@joliegg/moderation 0.4.4 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/LICENSE.md +21 -118
  2. package/README.md +4 -6
  3. package/dist/actions.d.ts +28 -0
  4. package/dist/actions.js +48 -0
  5. package/dist/client.d.ts +19 -0
  6. package/dist/client.js +97 -0
  7. package/dist/{url-blacklist.json → data/url-blacklist.json} +1 -0
  8. package/dist/index.d.ts +3 -41
  9. package/dist/index.js +20 -213
  10. package/dist/providers/aws.d.ts +11 -0
  11. package/dist/providers/aws.js +58 -0
  12. package/dist/providers/google.d.ts +21 -0
  13. package/dist/providers/google.js +61 -0
  14. package/dist/providers/webrisk.d.ts +9 -0
  15. package/dist/providers/webrisk.js +33 -0
  16. package/dist/raid/age.d.ts +6 -0
  17. package/dist/raid/age.js +19 -0
  18. package/dist/raid/detector.d.ts +56 -0
  19. package/dist/raid/detector.js +88 -0
  20. package/dist/raid/index.d.ts +2 -0
  21. package/dist/raid/index.js +18 -0
  22. package/dist/spam/cache.d.ts +99 -0
  23. package/dist/spam/cache.js +210 -0
  24. package/dist/spam/index.d.ts +1 -0
  25. package/dist/spam/index.js +17 -0
  26. package/dist/text/index.d.ts +2 -0
  27. package/dist/text/index.js +18 -0
  28. package/dist/text/mentions.d.ts +31 -0
  29. package/dist/text/mentions.js +55 -0
  30. package/dist/text/normalize.d.ts +15 -0
  31. package/dist/text/normalize.js +45 -0
  32. package/dist/types/config.d.ts +13 -0
  33. package/dist/types/config.js +2 -0
  34. package/dist/types/index.d.ts +3 -10
  35. package/dist/types/index.js +15 -0
  36. package/package.json +61 -20
  37. package/src/actions.ts +50 -0
  38. package/src/client.ts +121 -0
  39. package/src/{url-blacklist.json → data/url-blacklist.json} +1 -0
  40. package/src/index.ts +3 -277
  41. package/src/providers/aws.ts +58 -0
  42. package/src/providers/google.ts +63 -0
  43. package/src/providers/webrisk.ts +30 -0
  44. package/src/raid/age.ts +19 -0
  45. package/src/raid/detector.ts +122 -0
  46. package/src/raid/index.ts +2 -0
  47. package/src/spam/cache.ts +342 -0
  48. package/src/spam/index.ts +1 -0
  49. package/src/text/index.ts +2 -0
  50. package/src/text/mentions.ts +91 -0
  51. package/src/text/normalize.ts +43 -0
  52. package/src/types/config.ts +14 -0
  53. package/src/types/index.ts +5 -11
  54. /package/dist/{url-shorteners.json → data/url-shorteners.json} +0 -0
  55. /package/src/{url-shorteners.json → data/url-shorteners.json} +0 -0
@@ -0,0 +1,122 @@
1
+ export interface RaidDetectorOptions {
2
+ /** Number of joins within the window that constitutes a raid. Default to 10 (very small community) but this really depends on community size and activity. */
3
+ joinThreshold?: number;
4
+ /** Sliding window in seconds. Default to 60 seconds (1 minute). */
5
+ joinWindow?: number;
6
+ }
7
+
8
+ export interface MemberJoin {
9
+ memberId: string;
10
+ joinedTimestamp: number;
11
+ createdTimestamp: number;
12
+ }
13
+
14
+ export interface RaidTrackResult {
15
+ isRaid: boolean;
16
+ joinCount: number;
17
+ windowSeconds: number;
18
+ }
19
+
20
+ export type EnableResult = 'enabled' | 'already_active' | 'already_enabling';
21
+
22
+ interface JoinEvent {
23
+ memberId: string;
24
+ timestamp: number;
25
+ }
26
+
27
+ interface GuildState {
28
+ joins: JoinEvent[];
29
+ raidActive: boolean;
30
+ }
31
+
32
+ /**
33
+ * Platform-agnostic raid detector. Tracks recent joins per guild in a
34
+ * sliding window and surfaces the "raid" signal when the join count
35
+ * crosses the configured threshold.
36
+ *
37
+ * State transitions (`tryEnable`, `disable`) are guarded by an
38
+ * in-memory mutex so concurrent `handleMemberJoin` invocations during
39
+ * a join burst cannot both see "not active" and double-fire the
40
+ * enable side effects.
41
+ *
42
+ * The detector owns the sliding-window state. It does NOT own the
43
+ * enforcement — callers decide what to do when `isRaid` is true
44
+ * (timeout, kick, auto-disable timer, mod-channel alert, etc.).
45
+ */
46
+ export class RaidDetector {
47
+ readonly joinThreshold: number;
48
+ readonly joinWindow: number;
49
+
50
+ private state = new Map<string, GuildState>();
51
+ private enabling = new Set<string>();
52
+
53
+ constructor(options: RaidDetectorOptions = {}) {
54
+ this.joinThreshold = options.joinThreshold ?? 10;
55
+ this.joinWindow = (options.joinWindow ?? 60) * 1000;
56
+ }
57
+
58
+ private getState(guildId: string): GuildState {
59
+ if (!this.state.has(guildId)) {
60
+ this.state.set(guildId, { joins: [], raidActive: false });
61
+ }
62
+ return this.state.get(guildId)!;
63
+ }
64
+
65
+ private cleanupJoins(state: GuildState, now: number): void {
66
+ state.joins = state.joins.filter(j => now - j.timestamp < this.joinWindow);
67
+ }
68
+
69
+ /**
70
+ * Record a join and return whether the guild has crossed the raid
71
+ * threshold inside the window. `tryEnable` is a separate call so the
72
+ * caller can act on the raid signal atomically.
73
+ */
74
+ track(guildId: string, member: MemberJoin): RaidTrackResult {
75
+ const now = Date.now();
76
+ const state = this.getState(guildId);
77
+ this.cleanupJoins(state, now);
78
+ state.joins.push({ memberId: member.memberId, timestamp: now });
79
+ return {
80
+ isRaid: state.joins.length >= this.joinThreshold,
81
+ joinCount: state.joins.length,
82
+ windowSeconds: this.joinWindow / 1000,
83
+ };
84
+ }
85
+
86
+ isActive(guildId: string): boolean {
87
+ return this.state.get(guildId)?.raidActive ?? false;
88
+ }
89
+
90
+ /**
91
+ * Attempt to flip the guild into raid-active state. Returns:
92
+ * - 'enabled' if this call performed the transition
93
+ * - 'already_active' if raid mode was already on
94
+ * - 'already_enabling' if another concurrent call is mid-transition
95
+ */
96
+ async tryEnable(guildId: string): Promise<EnableResult> {
97
+ const state = this.getState(guildId);
98
+ if (state.raidActive) return 'already_active';
99
+ if (this.enabling.has(guildId)) return 'already_enabling';
100
+ this.enabling.add(guildId);
101
+ try {
102
+ state.raidActive = true;
103
+ return 'enabled';
104
+ } finally {
105
+ this.enabling.delete(guildId);
106
+ }
107
+ }
108
+
109
+ disable(guildId: string): void {
110
+ const state = this.getState(guildId);
111
+ state.raidActive = false;
112
+ this.enabling.delete(guildId);
113
+ }
114
+
115
+ getJoinCount(guildId: string, windowSeconds?: number): number {
116
+ const now = Date.now();
117
+ const state = this.getState(guildId);
118
+ const window = (windowSeconds ?? this.joinWindow / 1000) * 1000;
119
+
120
+ return state.joins.filter(j => now - j.timestamp < window).length;
121
+ }
122
+ }
@@ -0,0 +1,2 @@
1
+ export * from './detector';
2
+ export * from './age';
@@ -0,0 +1,342 @@
1
+ import { createHash } from 'node:crypto';
2
+ import { normalizeText } from '../text/normalize';
3
+
4
+ /**
5
+ * MD5 hash for content fingerprinting.
6
+ *
7
+ * Prefers Bun.CryptoHasher when available, falls back to node:crypto
8
+ */
9
+ const md5 = (input: string): string => {
10
+ // @ts-expect-error — Bun global is not in node types
11
+ if (typeof Bun !== 'undefined' && Bun.CryptoHasher) {
12
+ // @ts-expect-error — same
13
+ const hasher = new Bun.CryptoHasher('md5');
14
+ hasher.update(input);
15
+ return hasher.digest('hex');
16
+ }
17
+
18
+ return createHash('md5').update(input).digest('hex');
19
+ };
20
+
21
+ export interface SpamCacheOptions {
22
+ /** Maximum messages allowed within the rate-limit window. Default 8. */
23
+ rateLimit?: number;
24
+ /** Rate-limit window in seconds. Default 10. */
25
+ rateLimitWindow?: number;
26
+ /** How many identical messages trigger a duplicate alert. Default 3. */
27
+ duplicateThreshold?: number;
28
+ /** Duplicate-detection window in seconds. Default 30. */
29
+ duplicateWindow?: number;
30
+ /** Daytime timeout duration in minutes. Default 180. */
31
+ timeoutDurationDay?: number;
32
+ /** Hour of day (0-23) when nighttime timeouts start. Default 23. */
33
+ nightStartHour?: number;
34
+ /** Hour of day (0-23) when nighttime timeouts end. Default 11. */
35
+ nightEndHour?: number;
36
+ /** IANA timezone for night detection. Default 'America/Mexico_City'. */
37
+ timezone?: string;
38
+ /** LRU capacity for tracked users. Default 10000. */
39
+ maxUsers?: number;
40
+ }
41
+
42
+ export interface SpamContent {
43
+ text?: string;
44
+ attachments?: { name: string; size: number }[];
45
+ stickerIds?: string[];
46
+ messageId?: string | null;
47
+ channelId?: string | null;
48
+ }
49
+
50
+ export interface SpamMessageRef {
51
+ messageId: string;
52
+ channelId: string;
53
+ }
54
+
55
+ export type SpamReason = 'rate_limit' | 'duplicate';
56
+
57
+ export interface SpamResult {
58
+ isSpam: boolean;
59
+ reason: SpamReason | null;
60
+ details: string | null;
61
+ /**
62
+ * Message references that contributed to the spam trigger.
63
+ */
64
+ priorMessageIds?: SpamMessageRef[];
65
+ }
66
+
67
+ export interface SpamCacheStats {
68
+ trackedUsers: number;
69
+ maxUsers: number;
70
+ totalTimestamps: number;
71
+ totalHashes: number;
72
+ config: {
73
+ rateLimit: number;
74
+ rateLimitWindowSeconds: number;
75
+ duplicateThreshold: number;
76
+ duplicateWindowSeconds: number;
77
+ timeoutDurationDayMinutes: number;
78
+ nightHours: string;
79
+ timezone: string;
80
+ isNightTime: boolean;
81
+ currentTimeoutMinutes: number;
82
+ };
83
+ }
84
+
85
+ interface TimestampEntry {
86
+ time: number;
87
+ channelId: string | null;
88
+ messageId: string | null;
89
+ }
90
+
91
+ interface HashEntry {
92
+ hash: string;
93
+ timestamp: number;
94
+ messageId: string | null;
95
+ channelId: string | null;
96
+ }
97
+
98
+ interface UserTracking {
99
+ timestamps: TimestampEntry[];
100
+ messageHashes: HashEntry[];
101
+ }
102
+
103
+ /**
104
+ * Spam Cache. Tracks per-user message timestamps and content hashes to detect
105
+ * three kinds of abuse:
106
+ *
107
+ * - Rate limit: too many messages in a rolling window
108
+ * - Duplicate: the same content repeated across messages
109
+ * - Cross-channel: the same user hopping channels in quick succession
110
+ *
111
+ * Consumers are responsible for calling `cleanup()` periodically to
112
+ * evict expired entries.
113
+ */
114
+ export class SpamCache {
115
+ readonly rateLimit: number;
116
+ readonly rateLimitWindow: number;
117
+ readonly duplicateThreshold: number;
118
+ readonly duplicateWindow: number;
119
+ readonly timeoutDurationDay: number;
120
+ readonly nightStartHour: number;
121
+ readonly nightEndHour: number;
122
+ readonly timezone: string;
123
+ readonly maxUsers: number;
124
+
125
+ private userTracking = new Map<string, UserTracking>();
126
+
127
+ constructor(options: SpamCacheOptions = {}) {
128
+ this.rateLimit = options.rateLimit ?? 8;
129
+ this.rateLimitWindow = (options.rateLimitWindow ?? 10) * 1000;
130
+ this.duplicateThreshold = options.duplicateThreshold ?? 3;
131
+ this.duplicateWindow = (options.duplicateWindow ?? 30) * 1000;
132
+ this.timeoutDurationDay = options.timeoutDurationDay ?? 180;
133
+ this.nightStartHour = options.nightStartHour ?? 23;
134
+ this.nightEndHour = options.nightEndHour ?? 11;
135
+ this.timezone = options.timezone ?? 'America/Mexico_City';
136
+ this.maxUsers = options.maxUsers ?? 10000;
137
+ }
138
+
139
+ private hashContent(content: string): string {
140
+ return md5(normalizeText(content));
141
+ }
142
+
143
+ private generateContentId(options: SpamContent): string {
144
+ const { text, attachments = [], stickerIds = [] } = options;
145
+ const parts: string[] = [];
146
+
147
+ if (text && text.trim()) {
148
+ parts.push(`text:${this.hashContent(text)}`);
149
+ }
150
+
151
+ if (attachments.length > 0) {
152
+ const fingerprints = attachments.map(a => `${a.name}:${a.size}`).sort();
153
+ parts.push(`attachments:${this.hashContent(fingerprints.join('|'))}`);
154
+ }
155
+
156
+ if (stickerIds.length > 0) {
157
+ const sorted = [...stickerIds].sort();
158
+ parts.push(`stickers:${sorted.join(',')}`);
159
+ }
160
+
161
+ if (parts.length === 0) {
162
+ return `empty:${Date.now()}`;
163
+ }
164
+
165
+ return parts.join('::');
166
+ }
167
+
168
+ private getTracking(userId: string): UserTracking {
169
+ if (!this.userTracking.has(userId)) {
170
+ if (this.userTracking.size >= this.maxUsers) {
171
+ const firstKey = this.userTracking.keys().next().value;
172
+
173
+ if (firstKey !== undefined) {
174
+ this.userTracking.delete(firstKey);
175
+ }
176
+ }
177
+
178
+ this.userTracking.set(userId, { timestamps: [], messageHashes: [] });
179
+ }
180
+
181
+ return this.userTracking.get(userId)!;
182
+ }
183
+
184
+ private cleanupTracking(tracking: UserTracking, now: number): void {
185
+ tracking.timestamps = tracking.timestamps.filter(e => now - e.time < this.rateLimitWindow);
186
+ tracking.messageHashes = tracking.messageHashes.filter(e => now - e.timestamp < this.duplicateWindow);
187
+ }
188
+
189
+ track(userId: string, content: SpamContent): SpamResult {
190
+ const now = Date.now();
191
+ const tracking = this.getTracking(userId);
192
+
193
+ this.cleanupTracking(tracking, now);
194
+
195
+ const messageId = content.messageId ?? null;
196
+ const channelId = content.channelId ?? null;
197
+ const contentId = this.generateContentId(content);
198
+
199
+ tracking.timestamps.push({ time: now, channelId, messageId });
200
+
201
+ const collectPriorMessageIds = (): SpamMessageRef[] =>
202
+ tracking.timestamps
203
+ .filter(t => t.messageId && t.channelId)
204
+ .map(t => ({ messageId: t.messageId!, channelId: t.channelId! }));
205
+
206
+ if (tracking.timestamps.length > this.rateLimit) {
207
+ return {
208
+ isSpam: true,
209
+ reason: 'rate_limit',
210
+ details: `Sent ${tracking.timestamps.length} messages in ${this.rateLimitWindow / 1000} seconds (limit: ${this.rateLimit})`,
211
+ priorMessageIds: collectPriorMessageIds(),
212
+ };
213
+ }
214
+
215
+ const uniqueChannels = new Set(tracking.timestamps.map(t => t.channelId).filter(Boolean)).size;
216
+ if (uniqueChannels >= 3) {
217
+ return {
218
+ isSpam: true,
219
+ reason: 'rate_limit',
220
+ details: `Cross-channel spam detected: Posted in ${uniqueChannels} channels in ${this.rateLimitWindow / 1000} seconds`,
221
+ priorMessageIds: collectPriorMessageIds(),
222
+ };
223
+ }
224
+
225
+ if (!contentId.startsWith('empty:')) {
226
+ const duplicates = tracking.messageHashes.filter(e => e.hash === contentId);
227
+ const duplicateCount = duplicates.length;
228
+
229
+ tracking.messageHashes.push({ hash: contentId, timestamp: now, messageId, channelId });
230
+
231
+ if (duplicateCount >= this.duplicateThreshold - 1) {
232
+ const priorMessageIds = duplicates
233
+ .filter(e => e.messageId && e.channelId)
234
+ .map(e => ({ messageId: e.messageId!, channelId: e.channelId! }));
235
+
236
+ return {
237
+ isSpam: true,
238
+ reason: 'duplicate',
239
+ details: `Sent the same content ${duplicateCount + 1} times in ${this.duplicateWindow / 1000} seconds (limit: ${this.duplicateThreshold})`,
240
+ priorMessageIds,
241
+ };
242
+ }
243
+ } else {
244
+ tracking.messageHashes.push({ hash: contentId, timestamp: now, messageId, channelId });
245
+ }
246
+
247
+ return { isSpam: false, reason: null, details: null };
248
+ }
249
+
250
+ reset(userId: string): void {
251
+ this.userTracking.delete(userId);
252
+ }
253
+
254
+ clear(): void {
255
+ this.userTracking.clear();
256
+ }
257
+
258
+ getStats(): SpamCacheStats {
259
+ let totalTimestamps = 0;
260
+ let totalHashes = 0;
261
+
262
+ for (const tracking of this.userTracking.values()) {
263
+ totalTimestamps += tracking.timestamps.length;
264
+ totalHashes += tracking.messageHashes.length;
265
+ }
266
+
267
+ return {
268
+ trackedUsers: this.userTracking.size,
269
+ maxUsers: this.maxUsers,
270
+ totalTimestamps,
271
+ totalHashes,
272
+ config: {
273
+ rateLimit: this.rateLimit,
274
+ rateLimitWindowSeconds: this.rateLimitWindow / 1000,
275
+ duplicateThreshold: this.duplicateThreshold,
276
+ duplicateWindowSeconds: this.duplicateWindow / 1000,
277
+ timeoutDurationDayMinutes: this.timeoutDurationDay,
278
+ nightHours: `${this.nightStartHour}:00 - ${this.nightEndHour}:00`,
279
+ timezone: this.timezone,
280
+ isNightTime: this.isNightTime(),
281
+ currentTimeoutMinutes: this.getTimeoutDurationMinutes(),
282
+ },
283
+ };
284
+ }
285
+
286
+ private getCurrentTime(): { hour: number; minute: number } {
287
+ const now = new Date();
288
+ const hour = parseInt(
289
+ new Intl.DateTimeFormat('en-US', { timeZone: this.timezone, hour: 'numeric', hour12: false }).format(now)
290
+ );
291
+
292
+ const minute = parseInt(
293
+ new Intl.DateTimeFormat('en-US', { timeZone: this.timezone, minute: 'numeric' }).format(now)
294
+ );
295
+
296
+ return { hour, minute };
297
+ }
298
+
299
+ isNightTime(): boolean {
300
+ const { hour } = this.getCurrentTime();
301
+
302
+ if (this.nightStartHour > this.nightEndHour) {
303
+ return hour >= this.nightStartHour || hour < this.nightEndHour;
304
+ }
305
+
306
+ return hour >= this.nightStartHour && hour < this.nightEndHour;
307
+ }
308
+
309
+ getMinutesUntilNightEnd(): number {
310
+ const { hour, minute } = this.getCurrentTime();
311
+ const hoursUntilEnd =
312
+ hour >= this.nightStartHour ? (24 - hour) + this.nightEndHour : this.nightEndHour - hour;
313
+ const totalMinutes = hoursUntilEnd * 60 - minute;
314
+
315
+ return Math.max(totalMinutes, 60);
316
+ }
317
+
318
+ getTimeoutDurationMinutes(): number {
319
+ return this.isNightTime() ? this.getMinutesUntilNightEnd() : this.timeoutDurationDay;
320
+ }
321
+
322
+ getTimeoutDurationMs(): number {
323
+ return this.getTimeoutDurationMinutes() * 60 * 1000;
324
+ }
325
+
326
+ cleanup(): number {
327
+ const now = Date.now();
328
+ const toDelete: string[] = [];
329
+
330
+ for (const [userId, tracking] of this.userTracking.entries()) {
331
+ this.cleanupTracking(tracking, now);
332
+
333
+ if (tracking.timestamps.length === 0 && tracking.messageHashes.length === 0) {
334
+ toDelete.push(userId);
335
+ }
336
+ }
337
+
338
+ toDelete.forEach(userId => this.userTracking.delete(userId));
339
+
340
+ return toDelete.length;
341
+ }
342
+ }
@@ -0,0 +1 @@
1
+ export * from './cache';
@@ -0,0 +1,2 @@
1
+ export * from './normalize';
2
+ export * from './mentions';
@@ -0,0 +1,91 @@
1
+ export interface MentionConfig {
2
+ /** Maximum number of distinct user mentions per message. */
3
+ maxUserMentions: number;
4
+ /** Maximum number of distinct role mentions per message. */
5
+ maxRoleMentions: number;
6
+ /** Maximum number of total mentions (user + role) per message. */
7
+ maxTotalMentions: number;
8
+ /** Whether to treat `@everyone` as spam for the sender. */
9
+ blockEveryone: boolean;
10
+ /** Whether to treat `@here` as spam for the sender. */
11
+ blockHere: boolean;
12
+ }
13
+
14
+ export interface MentionCounts {
15
+ userMentions: number;
16
+ roleMentions: number;
17
+ hasEveryone: boolean;
18
+ hasHere: boolean;
19
+ }
20
+
21
+ export type MentionSpamReason =
22
+ | 'mention_everyone'
23
+ | 'mention_here'
24
+ | 'mention_users'
25
+ | 'mention_roles'
26
+ | 'mention_total';
27
+
28
+ export interface MentionSpamResult {
29
+ isSpam: boolean;
30
+ reason: MentionSpamReason | null;
31
+ details: string | null;
32
+ }
33
+
34
+ export const DEFAULT_MENTION_CONFIG: MentionConfig = {
35
+ maxUserMentions: 5,
36
+ maxRoleMentions: 3,
37
+ maxTotalMentions: 8,
38
+ blockEveryone: true,
39
+ blockHere: true,
40
+ };
41
+
42
+ /**
43
+ * Mention-spam check.
44
+ *
45
+ * `@everyone` detection takes priority over other reasons.
46
+ */
47
+ export function checkMentionSpam(counts: MentionCounts, config: MentionConfig): MentionSpamResult {
48
+ if (config.blockEveryone && counts.hasEveryone) {
49
+ return {
50
+ isSpam: true,
51
+ reason: 'mention_everyone',
52
+ details: '@everyone mentioned without permission',
53
+ };
54
+ }
55
+
56
+ if (config.blockHere && counts.hasHere) {
57
+ return {
58
+ isSpam: true,
59
+ reason: 'mention_here',
60
+ details: '@here mentioned without permission',
61
+ };
62
+ }
63
+
64
+ if (counts.userMentions > config.maxUserMentions) {
65
+ return {
66
+ isSpam: true,
67
+ reason: 'mention_users',
68
+ details: `${counts.userMentions} user mentions (limit: ${config.maxUserMentions})`,
69
+ };
70
+ }
71
+
72
+ if (counts.roleMentions > config.maxRoleMentions) {
73
+ return {
74
+ isSpam: true,
75
+ reason: 'mention_roles',
76
+ details: `${counts.roleMentions} role mentions (limit: ${config.maxRoleMentions})`,
77
+ };
78
+ }
79
+
80
+ const total = counts.userMentions + counts.roleMentions;
81
+
82
+ if (total > config.maxTotalMentions) {
83
+ return {
84
+ isSpam: true,
85
+ reason: 'mention_total',
86
+ details: `${total} total mentions (limit: ${config.maxTotalMentions})`,
87
+ };
88
+ }
89
+
90
+ return { isSpam: false, reason: null, details: null };
91
+ }
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Invisible / zero-width Unicode code points that users sometimes insert
3
+ * between letters to bypass substring-based filters. NFKC normalization
4
+ * does NOT collapse these on its own, so we strip them explicitly before
5
+ * normalizing.
6
+ *
7
+ * - U+200B Zero-Width Space
8
+ * - U+200C Zero-Width Non-Joiner
9
+ * - U+200D Zero-Width Joiner
10
+ * - U+200E Left-to-Right Mark
11
+ * - U+200F Right-to-Left Mark
12
+ * - U+2060 Word Joiner
13
+ * - U+FEFF Zero-Width No-Break Space (BOM)
14
+ */
15
+ // Matching individual zero-width code points by design (we are stripping
16
+ // them, not joining anything). ESLint's no-misleading-character-class
17
+ // flags this since \u200d is the Zero-Width Joiner; the warning does not
18
+ // apply here because every code point in the class is a literal target.
19
+ // eslint-disable-next-line no-misleading-character-class
20
+ const ZERO_WIDTH = /[\u200b\u200c\u200d\u200e\u200f\u2060\ufeff]/gu;
21
+
22
+ /**
23
+ * Canonicalizes user-submitted text for content matching:
24
+ *
25
+ * 1. trim surrounding whitespace
26
+ * 2. lowercase
27
+ * 3. strip zero-width / invisible characters
28
+ * 4. NFKC normalize (collapses bold, italic, fullwidth, circled,
29
+ * small-caps, and other compatibility variants to ASCII)
30
+ * 5. collapse internal whitespace runs to single spaces
31
+ *
32
+ * Useful for spam hashing, ban-list matching, and any other comparison
33
+ * where users should not be able to defeat a match by visually similar
34
+ * but technically distinct input.
35
+ */
36
+ export function normalizeText(input: string): string {
37
+ return input
38
+ .trim()
39
+ .toLowerCase()
40
+ .replace(ZERO_WIDTH, '')
41
+ .normalize('NFKC')
42
+ .replace(/\s+/g, ' ');
43
+ }
@@ -0,0 +1,14 @@
1
+ import type { RekognitionClientConfig } from '@aws-sdk/client-rekognition';
2
+
3
+ export interface ModerationConfiguration {
4
+ aws?: RekognitionClientConfig;
5
+ google?: {
6
+ apiKey?: string;
7
+ keyFile?: string;
8
+ };
9
+ openai?: {
10
+ apiKey?: string;
11
+ };
12
+ banList?: string[];
13
+ urlBlackList?: string[];
14
+ }
@@ -1,19 +1,13 @@
1
- import { RekognitionClientConfig } from '@aws-sdk/client-rekognition';
1
+ export * from './config';
2
2
 
3
- export interface ModerationConfiguration {
4
- aws?: RekognitionClientConfig;
5
- google?: {
6
- apiKey?: string;
7
- keyFile?: string;
8
- };
9
- banList?: string[];
10
- urlBlackList?: string[];
11
- }
3
+ export type Severity = 'low' | 'medium' | 'high' | 'critical';
12
4
 
13
5
  export interface ModerationCategory {
14
6
  category: string;
15
7
  confidence: number;
8
+ severity?: Severity;
16
9
  }
10
+
17
11
  export interface ModerationResult {
18
12
  source: string;
19
13
  moderation: ModerationCategory[];
@@ -29,4 +23,4 @@ export interface ThreatsResponse {
29
23
  threatTypes: string[];
30
24
  expireTime: string;
31
25
  };
32
- }
26
+ }