@joliegg/moderation 0.6.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/dist/actions.d.ts +28 -0
  2. package/dist/actions.js +48 -0
  3. package/dist/audit/emitter.d.ts +26 -0
  4. package/dist/audit/emitter.js +63 -0
  5. package/dist/audit/events.d.ts +75 -0
  6. package/dist/audit/events.js +2 -0
  7. package/dist/audit/index.d.ts +2 -0
  8. package/dist/audit/index.js +18 -0
  9. package/dist/client.d.ts +22 -0
  10. package/dist/client.js +107 -0
  11. package/dist/index.d.ts +3 -41
  12. package/dist/index.js +20 -213
  13. package/dist/providers/aws.d.ts +11 -0
  14. package/dist/providers/aws.js +58 -0
  15. package/dist/providers/google.d.ts +21 -0
  16. package/dist/providers/google.js +61 -0
  17. package/dist/providers/openai.d.ts +15 -0
  18. package/dist/providers/openai.js +54 -0
  19. package/dist/providers/webrisk.d.ts +9 -0
  20. package/dist/providers/webrisk.js +33 -0
  21. package/dist/raid/age.d.ts +6 -0
  22. package/dist/raid/age.js +19 -0
  23. package/dist/raid/detector.d.ts +56 -0
  24. package/dist/raid/detector.js +90 -0
  25. package/dist/raid/index.d.ts +2 -0
  26. package/dist/raid/index.js +18 -0
  27. package/dist/rubrics/defaults.d.ts +19 -0
  28. package/dist/rubrics/defaults.js +32 -0
  29. package/dist/rubrics/index.d.ts +3 -0
  30. package/dist/rubrics/index.js +19 -0
  31. package/dist/rubrics/rubric.d.ts +21 -0
  32. package/dist/rubrics/rubric.js +57 -0
  33. package/dist/rubrics/types.d.ts +27 -0
  34. package/dist/rubrics/types.js +2 -0
  35. package/dist/spam/cache.d.ts +99 -0
  36. package/dist/spam/cache.js +210 -0
  37. package/dist/spam/index.d.ts +1 -0
  38. package/dist/spam/index.js +17 -0
  39. package/dist/text/index.d.ts +2 -0
  40. package/dist/text/index.js +18 -0
  41. package/dist/text/mentions.d.ts +31 -0
  42. package/dist/text/mentions.js +55 -0
  43. package/dist/text/normalize.d.ts +15 -0
  44. package/dist/text/normalize.js +45 -0
  45. package/dist/types/config.d.ts +13 -0
  46. package/dist/types/config.js +2 -0
  47. package/dist/types/index.d.ts +3 -10
  48. package/dist/types/index.js +15 -0
  49. package/package.json +66 -13
  50. package/src/actions.ts +50 -0
  51. package/src/audit/emitter.ts +77 -0
  52. package/src/audit/events.ts +89 -0
  53. package/src/audit/index.ts +2 -0
  54. package/src/client.ts +137 -0
  55. package/src/index.ts +3 -277
  56. package/src/providers/aws.ts +58 -0
  57. package/src/providers/google.ts +63 -0
  58. package/src/providers/openai.ts +64 -0
  59. package/src/providers/webrisk.ts +30 -0
  60. package/src/raid/age.ts +19 -0
  61. package/src/raid/detector.ts +133 -0
  62. package/src/raid/index.ts +2 -0
  63. package/src/rubrics/defaults.ts +32 -0
  64. package/src/rubrics/index.ts +3 -0
  65. package/src/rubrics/rubric.ts +62 -0
  66. package/src/rubrics/types.ts +30 -0
  67. package/src/spam/cache.ts +342 -0
  68. package/src/spam/index.ts +1 -0
  69. package/src/text/index.ts +2 -0
  70. package/src/text/mentions.ts +91 -0
  71. package/src/text/normalize.ts +43 -0
  72. package/src/types/config.ts +14 -0
  73. package/src/types/index.ts +5 -11
  74. /package/dist/{url-blacklist.json → data/url-blacklist.json} +0 -0
  75. /package/dist/{url-shorteners.json → data/url-shorteners.json} +0 -0
  76. /package/src/{url-blacklist.json → data/url-blacklist.json} +0 -0
  77. /package/src/{url-shorteners.json → data/url-shorteners.json} +0 -0
@@ -0,0 +1,62 @@
1
+ import type { ModerationCategory, Severity } from '../types';
2
+ import type { RubricResult, RubricRule } from './types';
3
+
4
+ const SEVERITY_ORDER: Record<Severity, number> = {
5
+ low: 0,
6
+ medium: 1,
7
+ high: 2,
8
+ critical: 3,
9
+ };
10
+
11
+ /**
12
+ * Composable classifier-to-action mapping.
13
+ *
14
+ * Callers hand a `ScoringRubric` their classifier output
15
+ * (`ModerationCategory[]`) and get back a decision: the recommended
16
+ * action, the aggregate severity, and the categories that contributed.
17
+ *
18
+ * Apps can use the ship-with defaults (`STRICT_RUBRIC`,
19
+ * `PERMISSIVE_RUBRIC`, `NSFW_ONLY_RUBRIC`) or pass custom rules.
20
+ *
21
+ * Rules are evaluated in input order; the highest-severity match wins
22
+ * the recommended action. Every matched category is returned in the
23
+ * result so callers can explain the decision.
24
+ */
25
+ export class ScoringRubric {
26
+ constructor(private rules: RubricRule[]) {}
27
+
28
+ evaluate(categories: ModerationCategory[]): RubricResult {
29
+ const matched: ModerationCategory[] = [];
30
+ const reasons: string[] = [];
31
+
32
+ let best: RubricRule | null = null;
33
+
34
+ for (const category of categories) {
35
+ for (const rule of this.rules) {
36
+ const categoryMatches = rule.match.category === undefined || category.category.includes(rule.match.category);
37
+
38
+ if (!categoryMatches) {
39
+ continue;
40
+ }
41
+
42
+ if (category.confidence < rule.match.minConfidence) {
43
+ continue;
44
+ }
45
+
46
+ matched.push(category);
47
+ reasons.push(`${category.category} (${category.confidence.toFixed(0)}%) >= ${rule.match.minConfidence}% → ${rule.action}`);
48
+
49
+ if (!best || SEVERITY_ORDER[rule.severity] > SEVERITY_ORDER[best.severity]) {
50
+ best = rule;
51
+ }
52
+ }
53
+ }
54
+
55
+ return {
56
+ action: best?.action ?? null,
57
+ severity: best?.severity ?? 'low',
58
+ matched,
59
+ reasons,
60
+ };
61
+ }
62
+ }
@@ -0,0 +1,30 @@
1
+ import type { ModerationCategory, Severity } from '../types';
2
+ import type { ActionType } from '../actions';
3
+
4
+ export interface RubricMatch {
5
+ /**
6
+ * Category name to match. Omit for a wildcard that fires on any category.
7
+ * Matched with substring `includes`, so a rule for `sexual` also fires
8
+ * on compound categories like `sexual/minors`.
9
+ */
10
+ category?: string;
11
+ /** Minimum confidence (0-100) required for this rule to fire. */
12
+ minConfidence: number;
13
+ }
14
+
15
+ export interface RubricRule {
16
+ match: RubricMatch;
17
+ action: ActionType;
18
+ severity: Severity;
19
+ }
20
+
21
+ export interface RubricResult {
22
+ /** The action the highest-severity matched rule recommends. */
23
+ action: ActionType | null;
24
+ /** Aggregate severity — matches the winning rule, or `'low'` if nothing matched. */
25
+ severity: Severity;
26
+ /** Every category that matched at least one rule. */
27
+ matched: ModerationCategory[];
28
+ /** Human-readable explanations — one per matched rule. */
29
+ reasons: string[];
30
+ }
@@ -0,0 +1,342 @@
1
+ import { createHash } from 'node:crypto';
2
+ import { normalizeText } from '../text/normalize';
3
+
4
+ /**
5
+ * MD5 hash for content fingerprinting.
6
+ *
7
+ * Prefers Bun.CryptoHasher when available, falls back to node:crypto
8
+ */
9
+ const md5 = (input: string): string => {
10
+ // @ts-expect-error — Bun global is not in node types
11
+ if (typeof Bun !== 'undefined' && Bun.CryptoHasher) {
12
+ // @ts-expect-error — same
13
+ const hasher = new Bun.CryptoHasher('md5');
14
+ hasher.update(input);
15
+ return hasher.digest('hex');
16
+ }
17
+
18
+ return createHash('md5').update(input).digest('hex');
19
+ };
20
+
21
+ export interface SpamCacheOptions {
22
+ /** Maximum messages allowed within the rate-limit window. Default 8. */
23
+ rateLimit?: number;
24
+ /** Rate-limit window in seconds. Default 10. */
25
+ rateLimitWindow?: number;
26
+ /** How many identical messages trigger a duplicate alert. Default 3. */
27
+ duplicateThreshold?: number;
28
+ /** Duplicate-detection window in seconds. Default 30. */
29
+ duplicateWindow?: number;
30
+ /** Daytime timeout duration in minutes. Default 180. */
31
+ timeoutDurationDay?: number;
32
+ /** Hour of day (0-23) when nighttime timeouts start. Default 23. */
33
+ nightStartHour?: number;
34
+ /** Hour of day (0-23) when nighttime timeouts end. Default 11. */
35
+ nightEndHour?: number;
36
+ /** IANA timezone for night detection. Default 'America/Mexico_City'. */
37
+ timezone?: string;
38
+ /** LRU capacity for tracked users. Default 10000. */
39
+ maxUsers?: number;
40
+ }
41
+
42
+ export interface SpamContent {
43
+ text?: string;
44
+ attachments?: { name: string; size: number }[];
45
+ stickerIds?: string[];
46
+ messageId?: string | null;
47
+ channelId?: string | null;
48
+ }
49
+
50
+ export interface SpamMessageRef {
51
+ messageId: string;
52
+ channelId: string;
53
+ }
54
+
55
+ export type SpamReason = 'rate_limit' | 'duplicate';
56
+
57
+ export interface SpamResult {
58
+ isSpam: boolean;
59
+ reason: SpamReason | null;
60
+ details: string | null;
61
+ /**
62
+ * Message references that contributed to the spam trigger.
63
+ */
64
+ priorMessageIds?: SpamMessageRef[];
65
+ }
66
+
67
+ export interface SpamCacheStats {
68
+ trackedUsers: number;
69
+ maxUsers: number;
70
+ totalTimestamps: number;
71
+ totalHashes: number;
72
+ config: {
73
+ rateLimit: number;
74
+ rateLimitWindowSeconds: number;
75
+ duplicateThreshold: number;
76
+ duplicateWindowSeconds: number;
77
+ timeoutDurationDayMinutes: number;
78
+ nightHours: string;
79
+ timezone: string;
80
+ isNightTime: boolean;
81
+ currentTimeoutMinutes: number;
82
+ };
83
+ }
84
+
85
+ interface TimestampEntry {
86
+ time: number;
87
+ channelId: string | null;
88
+ messageId: string | null;
89
+ }
90
+
91
+ interface HashEntry {
92
+ hash: string;
93
+ timestamp: number;
94
+ messageId: string | null;
95
+ channelId: string | null;
96
+ }
97
+
98
+ interface UserTracking {
99
+ timestamps: TimestampEntry[];
100
+ messageHashes: HashEntry[];
101
+ }
102
+
103
+ /**
104
+ * Spam Cache. Tracks per-user message timestamps and content hashes to detect
105
+ * three kinds of abuse:
106
+ *
107
+ * - Rate limit: too many messages in a rolling window
108
+ * - Duplicate: the same content repeated across messages
109
+ * - Cross-channel: the same user hopping channels in quick succession
110
+ *
111
+ * Consumers are responsible for calling `cleanup()` periodically to
112
+ * evict expired entries.
113
+ */
114
+ export class SpamCache {
115
+ readonly rateLimit: number;
116
+ readonly rateLimitWindow: number;
117
+ readonly duplicateThreshold: number;
118
+ readonly duplicateWindow: number;
119
+ readonly timeoutDurationDay: number;
120
+ readonly nightStartHour: number;
121
+ readonly nightEndHour: number;
122
+ readonly timezone: string;
123
+ readonly maxUsers: number;
124
+
125
+ private userTracking = new Map<string, UserTracking>();
126
+
127
+ constructor(options: SpamCacheOptions = {}) {
128
+ this.rateLimit = options.rateLimit ?? 8;
129
+ this.rateLimitWindow = (options.rateLimitWindow ?? 10) * 1000;
130
+ this.duplicateThreshold = options.duplicateThreshold ?? 3;
131
+ this.duplicateWindow = (options.duplicateWindow ?? 30) * 1000;
132
+ this.timeoutDurationDay = options.timeoutDurationDay ?? 180;
133
+ this.nightStartHour = options.nightStartHour ?? 23;
134
+ this.nightEndHour = options.nightEndHour ?? 11;
135
+ this.timezone = options.timezone ?? 'America/Mexico_City';
136
+ this.maxUsers = options.maxUsers ?? 10000;
137
+ }
138
+
139
+ private hashContent(content: string): string {
140
+ return md5(normalizeText(content));
141
+ }
142
+
143
+ private generateContentId(options: SpamContent): string {
144
+ const { text, attachments = [], stickerIds = [] } = options;
145
+ const parts: string[] = [];
146
+
147
+ if (text && text.trim()) {
148
+ parts.push(`text:${this.hashContent(text)}`);
149
+ }
150
+
151
+ if (attachments.length > 0) {
152
+ const fingerprints = attachments.map(a => `${a.name}:${a.size}`).sort();
153
+ parts.push(`attachments:${this.hashContent(fingerprints.join('|'))}`);
154
+ }
155
+
156
+ if (stickerIds.length > 0) {
157
+ const sorted = [...stickerIds].sort();
158
+ parts.push(`stickers:${sorted.join(',')}`);
159
+ }
160
+
161
+ if (parts.length === 0) {
162
+ return `empty:${Date.now()}`;
163
+ }
164
+
165
+ return parts.join('::');
166
+ }
167
+
168
+ private getTracking(userId: string): UserTracking {
169
+ if (!this.userTracking.has(userId)) {
170
+ if (this.userTracking.size >= this.maxUsers) {
171
+ const firstKey = this.userTracking.keys().next().value;
172
+
173
+ if (firstKey !== undefined) {
174
+ this.userTracking.delete(firstKey);
175
+ }
176
+ }
177
+
178
+ this.userTracking.set(userId, { timestamps: [], messageHashes: [] });
179
+ }
180
+
181
+ return this.userTracking.get(userId)!;
182
+ }
183
+
184
+ private cleanupTracking(tracking: UserTracking, now: number): void {
185
+ tracking.timestamps = tracking.timestamps.filter(e => now - e.time < this.rateLimitWindow);
186
+ tracking.messageHashes = tracking.messageHashes.filter(e => now - e.timestamp < this.duplicateWindow);
187
+ }
188
+
189
+ track(userId: string, content: SpamContent): SpamResult {
190
+ const now = Date.now();
191
+ const tracking = this.getTracking(userId);
192
+
193
+ this.cleanupTracking(tracking, now);
194
+
195
+ const messageId = content.messageId ?? null;
196
+ const channelId = content.channelId ?? null;
197
+ const contentId = this.generateContentId(content);
198
+
199
+ tracking.timestamps.push({ time: now, channelId, messageId });
200
+
201
+ const collectPriorMessageIds = (): SpamMessageRef[] =>
202
+ tracking.timestamps
203
+ .filter(t => t.messageId && t.channelId)
204
+ .map(t => ({ messageId: t.messageId!, channelId: t.channelId! }));
205
+
206
+ if (tracking.timestamps.length > this.rateLimit) {
207
+ return {
208
+ isSpam: true,
209
+ reason: 'rate_limit',
210
+ details: `Sent ${tracking.timestamps.length} messages in ${this.rateLimitWindow / 1000} seconds (limit: ${this.rateLimit})`,
211
+ priorMessageIds: collectPriorMessageIds(),
212
+ };
213
+ }
214
+
215
+ const uniqueChannels = new Set(tracking.timestamps.map(t => t.channelId).filter(Boolean)).size;
216
+ if (uniqueChannels >= 3) {
217
+ return {
218
+ isSpam: true,
219
+ reason: 'rate_limit',
220
+ details: `Cross-channel spam detected: Posted in ${uniqueChannels} channels in ${this.rateLimitWindow / 1000} seconds`,
221
+ priorMessageIds: collectPriorMessageIds(),
222
+ };
223
+ }
224
+
225
+ if (!contentId.startsWith('empty:')) {
226
+ const duplicates = tracking.messageHashes.filter(e => e.hash === contentId);
227
+ const duplicateCount = duplicates.length;
228
+
229
+ tracking.messageHashes.push({ hash: contentId, timestamp: now, messageId, channelId });
230
+
231
+ if (duplicateCount >= this.duplicateThreshold - 1) {
232
+ const priorMessageIds = duplicates
233
+ .filter(e => e.messageId && e.channelId)
234
+ .map(e => ({ messageId: e.messageId!, channelId: e.channelId! }));
235
+
236
+ return {
237
+ isSpam: true,
238
+ reason: 'duplicate',
239
+ details: `Sent the same content ${duplicateCount + 1} times in ${this.duplicateWindow / 1000} seconds (limit: ${this.duplicateThreshold})`,
240
+ priorMessageIds,
241
+ };
242
+ }
243
+ } else {
244
+ tracking.messageHashes.push({ hash: contentId, timestamp: now, messageId, channelId });
245
+ }
246
+
247
+ return { isSpam: false, reason: null, details: null };
248
+ }
249
+
250
+ reset(userId: string): void {
251
+ this.userTracking.delete(userId);
252
+ }
253
+
254
+ clear(): void {
255
+ this.userTracking.clear();
256
+ }
257
+
258
+ getStats(): SpamCacheStats {
259
+ let totalTimestamps = 0;
260
+ let totalHashes = 0;
261
+
262
+ for (const tracking of this.userTracking.values()) {
263
+ totalTimestamps += tracking.timestamps.length;
264
+ totalHashes += tracking.messageHashes.length;
265
+ }
266
+
267
+ return {
268
+ trackedUsers: this.userTracking.size,
269
+ maxUsers: this.maxUsers,
270
+ totalTimestamps,
271
+ totalHashes,
272
+ config: {
273
+ rateLimit: this.rateLimit,
274
+ rateLimitWindowSeconds: this.rateLimitWindow / 1000,
275
+ duplicateThreshold: this.duplicateThreshold,
276
+ duplicateWindowSeconds: this.duplicateWindow / 1000,
277
+ timeoutDurationDayMinutes: this.timeoutDurationDay,
278
+ nightHours: `${this.nightStartHour}:00 - ${this.nightEndHour}:00`,
279
+ timezone: this.timezone,
280
+ isNightTime: this.isNightTime(),
281
+ currentTimeoutMinutes: this.getTimeoutDurationMinutes(),
282
+ },
283
+ };
284
+ }
285
+
286
+ private getCurrentTime(): { hour: number; minute: number } {
287
+ const now = new Date();
288
+ const hour = parseInt(
289
+ new Intl.DateTimeFormat('en-US', { timeZone: this.timezone, hour: 'numeric', hour12: false }).format(now)
290
+ );
291
+
292
+ const minute = parseInt(
293
+ new Intl.DateTimeFormat('en-US', { timeZone: this.timezone, minute: 'numeric' }).format(now)
294
+ );
295
+
296
+ return { hour, minute };
297
+ }
298
+
299
+ isNightTime(): boolean {
300
+ const { hour } = this.getCurrentTime();
301
+
302
+ if (this.nightStartHour > this.nightEndHour) {
303
+ return hour >= this.nightStartHour || hour < this.nightEndHour;
304
+ }
305
+
306
+ return hour >= this.nightStartHour && hour < this.nightEndHour;
307
+ }
308
+
309
+ getMinutesUntilNightEnd(): number {
310
+ const { hour, minute } = this.getCurrentTime();
311
+ const hoursUntilEnd =
312
+ hour >= this.nightStartHour ? (24 - hour) + this.nightEndHour : this.nightEndHour - hour;
313
+ const totalMinutes = hoursUntilEnd * 60 - minute;
314
+
315
+ return Math.max(totalMinutes, 60);
316
+ }
317
+
318
+ getTimeoutDurationMinutes(): number {
319
+ return this.isNightTime() ? this.getMinutesUntilNightEnd() : this.timeoutDurationDay;
320
+ }
321
+
322
+ getTimeoutDurationMs(): number {
323
+ return this.getTimeoutDurationMinutes() * 60 * 1000;
324
+ }
325
+
326
+ cleanup(): number {
327
+ const now = Date.now();
328
+ const toDelete: string[] = [];
329
+
330
+ for (const [userId, tracking] of this.userTracking.entries()) {
331
+ this.cleanupTracking(tracking, now);
332
+
333
+ if (tracking.timestamps.length === 0 && tracking.messageHashes.length === 0) {
334
+ toDelete.push(userId);
335
+ }
336
+ }
337
+
338
+ toDelete.forEach(userId => this.userTracking.delete(userId));
339
+
340
+ return toDelete.length;
341
+ }
342
+ }
@@ -0,0 +1 @@
1
+ export * from './cache';
@@ -0,0 +1,2 @@
1
+ export * from './normalize';
2
+ export * from './mentions';
@@ -0,0 +1,91 @@
1
+ export interface MentionConfig {
2
+ /** Maximum number of distinct user mentions per message. */
3
+ maxUserMentions: number;
4
+ /** Maximum number of distinct role mentions per message. */
5
+ maxRoleMentions: number;
6
+ /** Maximum number of total mentions (user + role) per message. */
7
+ maxTotalMentions: number;
8
+ /** Whether to treat `@everyone` as spam for the sender. */
9
+ blockEveryone: boolean;
10
+ /** Whether to treat `@here` as spam for the sender. */
11
+ blockHere: boolean;
12
+ }
13
+
14
+ export interface MentionCounts {
15
+ userMentions: number;
16
+ roleMentions: number;
17
+ hasEveryone: boolean;
18
+ hasHere: boolean;
19
+ }
20
+
21
+ export type MentionSpamReason =
22
+ | 'mention_everyone'
23
+ | 'mention_here'
24
+ | 'mention_users'
25
+ | 'mention_roles'
26
+ | 'mention_total';
27
+
28
+ export interface MentionSpamResult {
29
+ isSpam: boolean;
30
+ reason: MentionSpamReason | null;
31
+ details: string | null;
32
+ }
33
+
34
+ export const DEFAULT_MENTION_CONFIG: MentionConfig = {
35
+ maxUserMentions: 5,
36
+ maxRoleMentions: 3,
37
+ maxTotalMentions: 8,
38
+ blockEveryone: true,
39
+ blockHere: true,
40
+ };
41
+
42
+ /**
43
+ * Mention-spam check.
44
+ *
45
+ * `@everyone` detection takes priority over other reasons.
46
+ */
47
+ export function checkMentionSpam(counts: MentionCounts, config: MentionConfig): MentionSpamResult {
48
+ if (config.blockEveryone && counts.hasEveryone) {
49
+ return {
50
+ isSpam: true,
51
+ reason: 'mention_everyone',
52
+ details: '@everyone mentioned without permission',
53
+ };
54
+ }
55
+
56
+ if (config.blockHere && counts.hasHere) {
57
+ return {
58
+ isSpam: true,
59
+ reason: 'mention_here',
60
+ details: '@here mentioned without permission',
61
+ };
62
+ }
63
+
64
+ if (counts.userMentions > config.maxUserMentions) {
65
+ return {
66
+ isSpam: true,
67
+ reason: 'mention_users',
68
+ details: `${counts.userMentions} user mentions (limit: ${config.maxUserMentions})`,
69
+ };
70
+ }
71
+
72
+ if (counts.roleMentions > config.maxRoleMentions) {
73
+ return {
74
+ isSpam: true,
75
+ reason: 'mention_roles',
76
+ details: `${counts.roleMentions} role mentions (limit: ${config.maxRoleMentions})`,
77
+ };
78
+ }
79
+
80
+ const total = counts.userMentions + counts.roleMentions;
81
+
82
+ if (total > config.maxTotalMentions) {
83
+ return {
84
+ isSpam: true,
85
+ reason: 'mention_total',
86
+ details: `${total} total mentions (limit: ${config.maxTotalMentions})`,
87
+ };
88
+ }
89
+
90
+ return { isSpam: false, reason: null, details: null };
91
+ }
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Invisible / zero-width Unicode code points that users sometimes insert
3
+ * between letters to bypass substring-based filters. NFKC normalization
4
+ * does NOT collapse these on its own, so we strip them explicitly before
5
+ * normalizing.
6
+ *
7
+ * - U+200B Zero-Width Space
8
+ * - U+200C Zero-Width Non-Joiner
9
+ * - U+200D Zero-Width Joiner
10
+ * - U+200E Left-to-Right Mark
11
+ * - U+200F Right-to-Left Mark
12
+ * - U+2060 Word Joiner
13
+ * - U+FEFF Zero-Width No-Break Space (BOM)
14
+ */
15
+ // Matching individual zero-width code points by design (we are stripping
16
+ // them, not joining anything). ESLint's no-misleading-character-class
17
+ // flags this since \u200d is the Zero-Width Joiner; the warning does not
18
+ // apply here because every code point in the class is a literal target.
19
+ // eslint-disable-next-line no-misleading-character-class
20
+ const ZERO_WIDTH = /[\u200b\u200c\u200d\u200e\u200f\u2060\ufeff]/gu;
21
+
22
+ /**
23
+ * Canonicalizes user-submitted text for content matching:
24
+ *
25
+ * 1. trim surrounding whitespace
26
+ * 2. lowercase
27
+ * 3. strip zero-width / invisible characters
28
+ * 4. NFKC normalize (collapses bold, italic, fullwidth, circled,
29
+ * small-caps, and other compatibility variants to ASCII)
30
+ * 5. collapse internal whitespace runs to single spaces
31
+ *
32
+ * Useful for spam hashing, ban-list matching, and any other comparison
33
+ * where users should not be able to defeat a match by visually similar
34
+ * but technically distinct input.
35
+ */
36
+ export function normalizeText(input: string): string {
37
+ return input
38
+ .trim()
39
+ .toLowerCase()
40
+ .replace(ZERO_WIDTH, '')
41
+ .normalize('NFKC')
42
+ .replace(/\s+/g, ' ');
43
+ }
@@ -0,0 +1,14 @@
1
+ import type { RekognitionClientConfig } from '@aws-sdk/client-rekognition';
2
+
3
+ export interface ModerationConfiguration {
4
+ aws?: RekognitionClientConfig;
5
+ google?: {
6
+ apiKey?: string;
7
+ keyFile?: string;
8
+ };
9
+ openai?: {
10
+ apiKey?: string;
11
+ };
12
+ banList?: string[];
13
+ urlBlackList?: string[];
14
+ }
@@ -1,19 +1,13 @@
1
- import { RekognitionClientConfig } from '@aws-sdk/client-rekognition';
1
+ export * from './config';
2
2
 
3
- export interface ModerationConfiguration {
4
- aws?: RekognitionClientConfig;
5
- google?: {
6
- apiKey?: string;
7
- keyFile?: string;
8
- };
9
- banList?: string[];
10
- urlBlackList?: string[];
11
- }
3
+ export type Severity = 'low' | 'medium' | 'high' | 'critical';
12
4
 
13
5
  export interface ModerationCategory {
14
6
  category: string;
15
7
  confidence: number;
8
+ severity?: Severity;
16
9
  }
10
+
17
11
  export interface ModerationResult {
18
12
  source: string;
19
13
  moderation: ModerationCategory[];
@@ -29,4 +23,4 @@ export interface ThreatsResponse {
29
23
  threatTypes: string[];
30
24
  expireTime: string;
31
25
  };
32
- }
26
+ }