@joliegg/moderation 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json ADDED
@@ -0,0 +1,43 @@
1
+ {
2
+ "name": "@joliegg/moderation",
3
+ "version": "0.3.0",
4
+ "description": "A set of tools for chat moderation",
5
+ "author": "Diana Islas Ocampo",
6
+ "main": "dist/index.js",
7
+ "license": "HL3-CL",
8
+ "scripts": {
9
+ "publish": "yarn build && yarn docs",
10
+ "build": "yarn eslint . && rm -rf ./dist && yarn tsc --declaration",
11
+ "docs": "typedoc",
12
+ "test": "node test/index.js"
13
+ },
14
+ "engines": {
15
+ "node": "20.x"
16
+ },
17
+ "devDependencies": {
18
+ "@babel/eslint-parser": "^7.24.8",
19
+ "@eslint/js": "^9.7.0",
20
+ "@typescript-eslint/eslint-plugin": "^7.16.0",
21
+ "@typescript-eslint/parser": "^7.16.0",
22
+ "dotenv": "^16.4.5",
23
+ "eslint": "^9.7.0",
24
+ "typedoc": "^0.26.4",
25
+ "typescript": "^5.5.3",
26
+ "typescript-eslint": "^7.16.0"
27
+ },
28
+ "dependencies": {
29
+ "@aws-sdk/client-rekognition": "^3.614.0",
30
+ "@google-cloud/language": "^6.4.0",
31
+ "@google-cloud/speech": "^6.6.1",
32
+ "axios": "^1.7.2",
33
+ "sharp": "^0.32.6"
34
+ },
35
+ "files": [
36
+ "README.md",
37
+ "LICENSE.md",
38
+ "package.json",
39
+ "src/*",
40
+ "dist/*"
41
+ ],
42
+ "packageManager": "yarn@4.3.1"
43
+ }
package/src/index.ts ADDED
@@ -0,0 +1,218 @@
1
+ import axios from 'axios';
2
+
3
+ import { Rekognition } from '@aws-sdk/client-rekognition';
4
+ import { LanguageServiceClient } from '@google-cloud/language';
5
+ import { SpeechClient, protos } from '@google-cloud/speech';
6
+
7
+ import sharp from 'sharp';
8
+
9
+ import URLBlackList from './url-blacklist.json';
10
+
11
+
12
+ import { ModerationCategory, ModerationConfiguration, ModerationResult, ThreatsResponse } from './types';
13
+
14
+
15
+ type IRecognitionConfig = protos.google.cloud.speech.v1.IRecognitionConfig;
16
+
17
+ type ISpeechRecognitionResult = protos.google.cloud.speech.v1.ISpeechRecognitionResult;
18
+
19
+ /**
20
+ * Moderation Client
21
+ *
22
+ * @name ModerationClient
23
+ */
24
+ class ModerationClient {
25
+
26
+ private rekognitionClient?: Rekognition;
27
+ private googleLanguageClient?: LanguageServiceClient;
28
+ private googleSpeechClient?: SpeechClient;
29
+ private googleAPIKey?: string;
30
+ private banList?: string[];
31
+
32
+ /**
33
+ *
34
+ * @param {ModerationConfiguration} configuration
35
+ */
36
+ constructor (configuration: ModerationConfiguration) {
37
+ if (configuration.aws) {
38
+ this.rekognitionClient = new Rekognition(configuration.aws);
39
+ }
40
+
41
+ if (typeof configuration.google?.keyFile === 'string') {
42
+ this.googleLanguageClient = new LanguageServiceClient({ keyFile: configuration.google.keyFile });
43
+ this.googleSpeechClient = new SpeechClient({ keyFile: configuration.google.keyFile });
44
+ }
45
+
46
+ if (typeof configuration.google?.apiKey === 'string') {
47
+ this.googleAPIKey = configuration.google.apiKey;
48
+ }
49
+
50
+ if (Array.isArray(configuration.banList)) {
51
+ this.banList = configuration.banList;
52
+ }
53
+ }
54
+
55
+ /**
56
+ * Returns a list of moderation categories detected on a text
57
+ *
58
+ * @param {string} text The text to moderate
59
+ * @param {number} [minimumConfidence = 50] The minimum confidence required for a category to be considered
60
+ *
61
+ * @returns {Promise<ModerationResult>} The list of results that were detected with the minimum confidence specified
62
+ */
63
+ async moderateText (text: string, minimumConfidence: number = 50): Promise<ModerationResult> {
64
+ const categories: ModerationCategory[] = [];
65
+
66
+ if (Array.isArray(this.banList)) {
67
+ const normalizedText = text.toLowerCase();
68
+ const matches = this.banList.filter(w => normalizedText.indexOf(w) > -1);
69
+
70
+ if (matches.length > 0) {
71
+ categories.push({
72
+ category: 'Ban List',
73
+ confidence: matches.length,
74
+ });
75
+ }
76
+ }
77
+
78
+
79
+ if (typeof this.googleLanguageClient === 'undefined') {
80
+ return { source: text, moderation: categories };
81
+ }
82
+
83
+ const [ result ] = await this.googleLanguageClient.moderateText({
84
+ document: {
85
+ content: text,
86
+ type: 'PLAIN_TEXT',
87
+ },
88
+ });
89
+
90
+ if (result && 'moderationCategories' in result) {
91
+ if (Array.isArray(result.moderationCategories)) {
92
+ const results = result.moderationCategories.map(c => ({
93
+ category: c.name ?? 'Unknown',
94
+ confidence: (c.confidence ?? 0) * 100,
95
+ })).filter(c => c.confidence >= minimumConfidence);
96
+ return { source: text, moderation: [...categories, ...results] };
97
+ }
98
+ }
99
+
100
+ return { source: text, moderation: [] };
101
+ }
102
+
103
+ /**
104
+ * Returns a list of moderation categories detected on an image
105
+ *
106
+ * @param {string} url
107
+ * @param {number} [minimumConfidence = 95] The minimum confidence required for a category to be considered
108
+ *
109
+ *
110
+ * @returns {Promise<ModerationResult[]>} The list of results that were detected with the minimum confidence specified
111
+ */
112
+ async moderateImage (url: string, minimumConfidence: number = 95): Promise<ModerationResult> {
113
+ if (typeof this.rekognitionClient === 'undefined') {
114
+ return { source: url, moderation: [] };
115
+ }
116
+
117
+ const { data } = await axios.get<string>(url, { responseType: 'arraybuffer' });
118
+
119
+ let buffer: Buffer | null = null;
120
+
121
+ // GIFs will be split into frames
122
+ if (url.toLowerCase().indexOf('.gif') > -1) {
123
+ buffer = await sharp(data, { pages: -1 }).toFormat('png').toBuffer();
124
+ } else if (url.toLowerCase().indexOf('.webp') > -1) {
125
+ buffer = await sharp(data).toFormat('png').toBuffer();
126
+ } else {
127
+ // Download image as binary data
128
+ buffer = Buffer.from(data, 'binary');
129
+ }
130
+
131
+ const { ModerationLabels } = await this.rekognitionClient.detectModerationLabels({
132
+ Image: {
133
+ Bytes: buffer
134
+ },
135
+ MinConfidence: minimumConfidence
136
+ });
137
+
138
+ if (Array.isArray(ModerationLabels)) {
139
+ const moderation = ModerationLabels.map(l => ({
140
+ category: l.Name ?? 'Unknown',
141
+ confidence: l.Confidence ?? 0,
142
+ }));
143
+
144
+ return { source: url, moderation };
145
+ }
146
+
147
+ return { source: url, moderation: [] };
148
+ }
149
+
150
+ async moderateLink (url: string): Promise<ModerationResult> {
151
+ const blacklisted = URLBlackList.some(b => url.indexOf(b) > -1);
152
+
153
+ if (blacklisted) {
154
+ return { source: url, moderation: [{ category: 'BLACK_LIST', confidence: 100 }] };
155
+ }
156
+
157
+
158
+ if (typeof this.googleAPIKey !== 'string') {
159
+ return { source: url, moderation: [] };
160
+ }
161
+
162
+ const types = [
163
+ 'MALWARE',
164
+ 'SOCIAL_ENGINEERING',
165
+ 'UNWANTED_SOFTWARE',
166
+ 'SOCIAL_ENGINEERING_EXTENDED_COVERAGE'
167
+ ];
168
+
169
+ const threatTypes = types.join('&threatTypes=');
170
+ const requestUrl = `https://webrisk.googleapis.com/v1/uris:search?threatTypes=${threatTypes}&key=${this.googleAPIKey}`;
171
+
172
+ const { data } = await axios.get<ThreatsResponse>(`${requestUrl}&uri=${encodeURIComponent(url)}`);
173
+
174
+ const threats = data?.threat?.threatTypes;
175
+
176
+ if (Array.isArray(threats)) {
177
+ const moderation = threats.map(t => ({
178
+ category: t,
179
+ confidence: 100,
180
+ }));
181
+
182
+ return { source: url, moderation };
183
+ }
184
+
185
+ return { source: url, moderation: [] };
186
+ }
187
+
188
+ async moderateAudio (url: string, minimumConfidence: number = 50): Promise<ModerationResult> {
189
+ if (typeof this.googleSpeechClient === 'undefined') {
190
+ return { source: url, moderation: [] };
191
+ }
192
+
193
+ const { data } = await axios.get<string>(url, { responseType: 'arraybuffer' });
194
+
195
+
196
+ const options: IRecognitionConfig = {
197
+ encoding: 'OGG_OPUS',
198
+ sampleRateHertz: 48000,
199
+ languageCode: 'es-US'
200
+ };
201
+
202
+ const [ response ] = await this.googleSpeechClient.recognize ({
203
+ audio: { content: Buffer.from(data, 'binary').toString('base64') },
204
+ config: options,
205
+ });
206
+
207
+ if (!Array.isArray(response?.results)) {
208
+ return { source: url, moderation: [] };
209
+ }
210
+
211
+ const transcription = response?.results?.map((result: ISpeechRecognitionResult) => result.alternatives?.at(0)?.transcript ?? '').join(' ');
212
+
213
+ return this.moderateText(transcription, minimumConfidence);
214
+ }
215
+
216
+ }
217
+
218
+ export default ModerationClient;
@@ -0,0 +1,31 @@
1
+ import { RekognitionClientConfig } from '@aws-sdk/client-rekognition';
2
+
3
+ export interface ModerationConfiguration {
4
+ aws?: RekognitionClientConfig;
5
+ google?: {
6
+ apiKey?: string;
7
+ keyFile?: string;
8
+ };
9
+ banList?: string[];
10
+ }
11
+
12
+ export interface ModerationCategory {
13
+ category: string;
14
+ confidence: number;
15
+ }
16
+ export interface ModerationResult {
17
+ source: string;
18
+ moderation: ModerationCategory[];
19
+ }
20
+
21
+ export interface BannedWord {
22
+ word: string;
23
+ category: string;
24
+ }
25
+
26
+ export interface ThreatsResponse {
27
+ threat?: {
28
+ threatTypes: string[];
29
+ expireTime: string;
30
+ };
31
+ }