@joliegg/moderation 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/audit/emitter.d.ts +26 -0
- package/dist/audit/emitter.js +63 -0
- package/dist/audit/events.d.ts +75 -0
- package/dist/audit/events.js +2 -0
- package/dist/audit/index.d.ts +2 -0
- package/dist/audit/index.js +18 -0
- package/dist/client.d.ts +4 -1
- package/dist/client.js +15 -5
- package/dist/providers/openai.d.ts +15 -0
- package/dist/providers/openai.js +54 -0
- package/dist/raid/detector.js +4 -2
- package/dist/rubrics/defaults.d.ts +19 -0
- package/dist/rubrics/defaults.js +32 -0
- package/dist/rubrics/index.d.ts +3 -0
- package/dist/rubrics/index.js +19 -0
- package/dist/rubrics/rubric.d.ts +21 -0
- package/dist/rubrics/rubric.js +57 -0
- package/dist/rubrics/types.d.ts +27 -0
- package/dist/rubrics/types.js +2 -0
- package/package.json +13 -1
- package/src/audit/emitter.ts +77 -0
- package/src/audit/events.ts +89 -0
- package/src/audit/index.ts +2 -0
- package/src/client.ts +21 -5
- package/src/providers/openai.ts +64 -0
- package/src/raid/detector.ts +13 -2
- package/src/rubrics/defaults.ts +32 -0
- package/src/rubrics/index.ts +3 -0
- package/src/rubrics/rubric.ts +62 -0
- package/src/rubrics/types.ts +30 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import type { AuditEvent, AuditEventKind, AuditEventMap } from './events';
|
|
2
|
+
/**
|
|
3
|
+
* Subscriber handler type for a specific audit event kind. Sync or
|
|
4
|
+
* async; the emitter fire-and-forgets async handlers so slow
|
|
5
|
+
* persistence sinks don't block the enforcement path.
|
|
6
|
+
*/
|
|
7
|
+
export type AuditHandler<K extends AuditEventKind> = (event: AuditEventMap[K]) => void | Promise<void>;
|
|
8
|
+
/**
|
|
9
|
+
* Typed event emitter for moderation audit events.
|
|
10
|
+
*
|
|
11
|
+
* Subscribers register for specific event kinds with full type safety.
|
|
12
|
+
* Handlers that throw are isolated — a broken subscriber never blocks
|
|
13
|
+
* other subscribers or the emitter itself.
|
|
14
|
+
*
|
|
15
|
+
* Intentionally NOT `extends EventEmitter` from `node:events`: the
|
|
16
|
+
* typing story for Node's built-in emitter is painful, and the feature
|
|
17
|
+
* set we need (on / off / emit) is small enough to implement directly.
|
|
18
|
+
*/
|
|
19
|
+
export declare class AuditTrailEmitter {
|
|
20
|
+
private handlers;
|
|
21
|
+
on<K extends AuditEventKind>(kind: K, handler: AuditHandler<K>): this;
|
|
22
|
+
off<K extends AuditEventKind>(kind: K, handler: AuditHandler<K>): this;
|
|
23
|
+
emit(event: AuditEvent): void;
|
|
24
|
+
listenerCount(kind: AuditEventKind): number;
|
|
25
|
+
removeAllListeners(kind?: AuditEventKind): this;
|
|
26
|
+
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.AuditTrailEmitter = void 0;
|
|
4
|
+
/**
|
|
5
|
+
* Typed event emitter for moderation audit events.
|
|
6
|
+
*
|
|
7
|
+
* Subscribers register for specific event kinds with full type safety.
|
|
8
|
+
* Handlers that throw are isolated — a broken subscriber never blocks
|
|
9
|
+
* other subscribers or the emitter itself.
|
|
10
|
+
*
|
|
11
|
+
* Intentionally NOT `extends EventEmitter` from `node:events`: the
|
|
12
|
+
* typing story for Node's built-in emitter is painful, and the feature
|
|
13
|
+
* set we need (on / off / emit) is small enough to implement directly.
|
|
14
|
+
*/
|
|
15
|
+
class AuditTrailEmitter {
|
|
16
|
+
handlers = new Map();
|
|
17
|
+
on(kind, handler) {
|
|
18
|
+
if (!this.handlers.has(kind)) {
|
|
19
|
+
this.handlers.set(kind, new Set());
|
|
20
|
+
}
|
|
21
|
+
this.handlers.get(kind).add(handler);
|
|
22
|
+
return this;
|
|
23
|
+
}
|
|
24
|
+
off(kind, handler) {
|
|
25
|
+
this.handlers.get(kind)?.delete(handler);
|
|
26
|
+
return this;
|
|
27
|
+
}
|
|
28
|
+
emit(event) {
|
|
29
|
+
const handlers = this.handlers.get(event.kind);
|
|
30
|
+
if (!handlers) {
|
|
31
|
+
return;
|
|
32
|
+
}
|
|
33
|
+
for (const handler of handlers) {
|
|
34
|
+
try {
|
|
35
|
+
// Fire-and-forget. Async handlers are not awaited so the
|
|
36
|
+
// emitter's caller isn't blocked on slow persistence sinks.
|
|
37
|
+
// Subscribers own their own error handling for async work.
|
|
38
|
+
const result = handler(event);
|
|
39
|
+
if (result instanceof Promise) {
|
|
40
|
+
result.catch(err => {
|
|
41
|
+
console.error(`[Jolie::Moderation::AuditTrail] async handler error for ${event.kind}:`, err);
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
catch (err) {
|
|
46
|
+
console.error(`[Jolie::Moderation::AuditTrail] handler error for ${event.kind}:`, err);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
listenerCount(kind) {
|
|
51
|
+
return this.handlers.get(kind)?.size ?? 0;
|
|
52
|
+
}
|
|
53
|
+
removeAllListeners(kind) {
|
|
54
|
+
if (kind) {
|
|
55
|
+
this.handlers.delete(kind);
|
|
56
|
+
}
|
|
57
|
+
else {
|
|
58
|
+
this.handlers.clear();
|
|
59
|
+
}
|
|
60
|
+
return this;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
exports.AuditTrailEmitter = AuditTrailEmitter;
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import type { ActionType } from '../actions';
|
|
2
|
+
import type { SpamMessageRef, SpamReason } from '../spam/cache';
|
|
3
|
+
export type Platform = 'discord' | 'twitch';
|
|
4
|
+
/** Base fields shared by every audit event. */
|
|
5
|
+
interface AuditEventBase {
|
|
6
|
+
guildId: string;
|
|
7
|
+
platform: Platform;
|
|
8
|
+
timestamp: Date;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* A moderator (human or system) performed an enforcement action.
|
|
12
|
+
* This is the primary audit log entry.
|
|
13
|
+
*/
|
|
14
|
+
export interface ActionEvent extends AuditEventBase {
|
|
15
|
+
kind: 'action';
|
|
16
|
+
actionType: ActionType;
|
|
17
|
+
targetId: string;
|
|
18
|
+
targetName: string;
|
|
19
|
+
moderatorId: string;
|
|
20
|
+
moderatorName: string;
|
|
21
|
+
reason: string;
|
|
22
|
+
details?: string;
|
|
23
|
+
duration?: number;
|
|
24
|
+
channelId?: string;
|
|
25
|
+
messageId?: string;
|
|
26
|
+
evidence?: string[];
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Automatic spam detection fired. Independent of whether enforcement
|
|
30
|
+
* succeeded — the detection itself is the audit event.
|
|
31
|
+
*/
|
|
32
|
+
export interface SpamDetectedEvent extends AuditEventBase {
|
|
33
|
+
kind: 'spam_detected';
|
|
34
|
+
userId: string;
|
|
35
|
+
reason: SpamReason;
|
|
36
|
+
details?: string;
|
|
37
|
+
priorMessageIds: SpamMessageRef[];
|
|
38
|
+
channelId?: string;
|
|
39
|
+
}
|
|
40
|
+
/** Raid mode detection crossed the join-count threshold. */
|
|
41
|
+
export interface RaidDetectedEvent extends AuditEventBase {
|
|
42
|
+
kind: 'raid_detected';
|
|
43
|
+
joinCount: number;
|
|
44
|
+
windowSeconds: number;
|
|
45
|
+
autoTriggered: boolean;
|
|
46
|
+
}
|
|
47
|
+
/** A message was blocked by the per-user permissions check. */
|
|
48
|
+
export interface PermissionBlockEvent extends AuditEventBase {
|
|
49
|
+
kind: 'permission_block';
|
|
50
|
+
userId: string;
|
|
51
|
+
violations: string[];
|
|
52
|
+
channelId?: string;
|
|
53
|
+
messageId?: string;
|
|
54
|
+
}
|
|
55
|
+
/** An appeal was reviewed (approved or denied). */
|
|
56
|
+
export interface AppealReviewedEvent extends AuditEventBase {
|
|
57
|
+
kind: 'appeal_reviewed';
|
|
58
|
+
appealId: string;
|
|
59
|
+
userId: string;
|
|
60
|
+
reviewerId: string;
|
|
61
|
+
approved: boolean;
|
|
62
|
+
note?: string;
|
|
63
|
+
}
|
|
64
|
+
/** Union of every audit event shape. Add new kinds here. */
|
|
65
|
+
export type AuditEvent = ActionEvent | SpamDetectedEvent | RaidDetectedEvent | PermissionBlockEvent | AppealReviewedEvent;
|
|
66
|
+
/** Map from event kind → concrete event type. Used by the emitter. */
|
|
67
|
+
export interface AuditEventMap {
|
|
68
|
+
action: ActionEvent;
|
|
69
|
+
spam_detected: SpamDetectedEvent;
|
|
70
|
+
raid_detected: RaidDetectedEvent;
|
|
71
|
+
permission_block: PermissionBlockEvent;
|
|
72
|
+
appeal_reviewed: AppealReviewedEvent;
|
|
73
|
+
}
|
|
74
|
+
export type AuditEventKind = keyof AuditEventMap;
|
|
75
|
+
export {};
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
+
};
|
|
16
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
+
__exportStar(require("./events"), exports);
|
|
18
|
+
__exportStar(require("./emitter"), exports);
|
package/dist/client.d.ts
CHANGED
|
@@ -8,10 +8,13 @@ export declare class ModerationClient {
|
|
|
8
8
|
private googleSpeech?;
|
|
9
9
|
private aws?;
|
|
10
10
|
private webRisk?;
|
|
11
|
+
private openai?;
|
|
11
12
|
private banList;
|
|
12
13
|
private urlBlackList;
|
|
13
14
|
constructor(configuration: ModerationConfiguration);
|
|
14
|
-
moderateText(text: string, minimumConfidence?: number
|
|
15
|
+
moderateText(text: string, minimumConfidence?: number, options?: {
|
|
16
|
+
provider?: 'google' | 'openai' | 'all';
|
|
17
|
+
}): Promise<ModerationResult>;
|
|
15
18
|
moderateImage(url: string, minimumConfidence?: number): Promise<ModerationResult>;
|
|
16
19
|
moderateLink(url: string, allowShorteners?: boolean): Promise<ModerationResult>;
|
|
17
20
|
moderateAudio(url: string, language?: string, minimumConfidence?: number): Promise<ModerationResult>;
|
package/dist/client.js
CHANGED
|
@@ -10,6 +10,7 @@ const speech_1 = require("@google-cloud/speech");
|
|
|
10
10
|
const google_1 = require("./providers/google");
|
|
11
11
|
const aws_1 = require("./providers/aws");
|
|
12
12
|
const webrisk_1 = require("./providers/webrisk");
|
|
13
|
+
const openai_1 = require("./providers/openai");
|
|
13
14
|
const url_blacklist_json_1 = __importDefault(require("./data/url-blacklist.json"));
|
|
14
15
|
const url_shorteners_json_1 = __importDefault(require("./data/url-shorteners.json"));
|
|
15
16
|
/**
|
|
@@ -21,6 +22,7 @@ class ModerationClient {
|
|
|
21
22
|
googleSpeech;
|
|
22
23
|
aws;
|
|
23
24
|
webRisk;
|
|
25
|
+
openai;
|
|
24
26
|
banList = [];
|
|
25
27
|
urlBlackList = [];
|
|
26
28
|
constructor(configuration) {
|
|
@@ -34,6 +36,9 @@ class ModerationClient {
|
|
|
34
36
|
if (typeof configuration.google?.apiKey === 'string') {
|
|
35
37
|
this.webRisk = new webrisk_1.WebRiskProvider(configuration.google.apiKey);
|
|
36
38
|
}
|
|
39
|
+
if (typeof configuration.openai?.apiKey === 'string') {
|
|
40
|
+
this.openai = new openai_1.OpenAIModerationProvider(configuration.openai.apiKey);
|
|
41
|
+
}
|
|
37
42
|
if (Array.isArray(configuration.banList)) {
|
|
38
43
|
this.banList = configuration.banList;
|
|
39
44
|
}
|
|
@@ -41,7 +46,7 @@ class ModerationClient {
|
|
|
41
46
|
this.urlBlackList = configuration.urlBlackList;
|
|
42
47
|
}
|
|
43
48
|
}
|
|
44
|
-
async moderateText(text, minimumConfidence = 50) {
|
|
49
|
+
async moderateText(text, minimumConfidence = 50, options = {}) {
|
|
45
50
|
const categories = [];
|
|
46
51
|
const normalized = text.toLowerCase();
|
|
47
52
|
const matches = this.banList.filter(w => normalized.includes(w));
|
|
@@ -49,11 +54,16 @@ class ModerationClient {
|
|
|
49
54
|
const words = normalized.split(' ');
|
|
50
55
|
categories.push({ category: 'BAN_LIST', confidence: (matches.length / words.length) * 100 });
|
|
51
56
|
}
|
|
52
|
-
|
|
53
|
-
|
|
57
|
+
const provider = options.provider ?? 'google';
|
|
58
|
+
if ((provider === 'google' || provider === 'all') && this.googleLanguage) {
|
|
59
|
+
const google = await this.googleLanguage.moderateText(text, minimumConfidence);
|
|
60
|
+
categories.push(...google);
|
|
61
|
+
}
|
|
62
|
+
if ((provider === 'openai' || provider === 'all') && this.openai) {
|
|
63
|
+
const openai = await this.openai.moderateText(text, minimumConfidence);
|
|
64
|
+
categories.push(...openai);
|
|
54
65
|
}
|
|
55
|
-
|
|
56
|
-
return { source: text, moderation: [...categories, ...googleCategories] };
|
|
66
|
+
return { source: text, moderation: categories };
|
|
57
67
|
}
|
|
58
68
|
async moderateImage(url, minimumConfidence = 50) {
|
|
59
69
|
if (!this.aws) {
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import type { ModerationCategory } from '../types';
|
|
2
|
+
/**
|
|
3
|
+
* OpenAI Moderation API adapter.
|
|
4
|
+
*
|
|
5
|
+
* Uses the free `omni-moderation-latest` endpoint (no token cost,
|
|
6
|
+
* no SDK). Returns `ModerationCategory[]` in the same shape the
|
|
7
|
+
* Google and AWS providers return, so downstream callers can treat
|
|
8
|
+
* every provider uniformly.
|
|
9
|
+
*/
|
|
10
|
+
export declare class OpenAIModerationProvider {
|
|
11
|
+
private apiKey;
|
|
12
|
+
private model;
|
|
13
|
+
constructor(apiKey: string, model?: string);
|
|
14
|
+
moderateText(text: string, minimumConfidence?: number): Promise<ModerationCategory[]>;
|
|
15
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.OpenAIModerationProvider = void 0;
|
|
4
|
+
/**
|
|
5
|
+
* OpenAI Moderation API adapter.
|
|
6
|
+
*
|
|
7
|
+
* Uses the free `omni-moderation-latest` endpoint (no token cost,
|
|
8
|
+
* no SDK). Returns `ModerationCategory[]` in the same shape the
|
|
9
|
+
* Google and AWS providers return, so downstream callers can treat
|
|
10
|
+
* every provider uniformly.
|
|
11
|
+
*/
|
|
12
|
+
class OpenAIModerationProvider {
|
|
13
|
+
apiKey;
|
|
14
|
+
model;
|
|
15
|
+
constructor(apiKey, model = 'omni-moderation-latest') {
|
|
16
|
+
this.apiKey = apiKey;
|
|
17
|
+
this.model = model;
|
|
18
|
+
}
|
|
19
|
+
async moderateText(text, minimumConfidence = 0) {
|
|
20
|
+
if (!this.apiKey) {
|
|
21
|
+
return [];
|
|
22
|
+
}
|
|
23
|
+
let response;
|
|
24
|
+
try {
|
|
25
|
+
response = await fetch('https://api.openai.com/v1/moderations', {
|
|
26
|
+
method: 'POST',
|
|
27
|
+
headers: {
|
|
28
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
29
|
+
'Content-Type': 'application/json',
|
|
30
|
+
},
|
|
31
|
+
body: JSON.stringify({ model: this.model, input: text }),
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
catch {
|
|
35
|
+
return [];
|
|
36
|
+
}
|
|
37
|
+
if (!response.ok) {
|
|
38
|
+
return [];
|
|
39
|
+
}
|
|
40
|
+
const data = (await response.json());
|
|
41
|
+
const result = data.results?.[0];
|
|
42
|
+
if (!result) {
|
|
43
|
+
return [];
|
|
44
|
+
}
|
|
45
|
+
return Object.entries(result.categories)
|
|
46
|
+
.filter(([, flagged]) => flagged)
|
|
47
|
+
.map(([category]) => ({
|
|
48
|
+
category,
|
|
49
|
+
confidence: (result.category_scores[category] ?? 0) * 100,
|
|
50
|
+
}))
|
|
51
|
+
.filter(c => c.confidence >= minimumConfidence);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
exports.OpenAIModerationProvider = OpenAIModerationProvider;
|
package/dist/raid/detector.js
CHANGED
|
@@ -60,10 +60,12 @@ class RaidDetector {
|
|
|
60
60
|
*/
|
|
61
61
|
async tryEnable(guildId) {
|
|
62
62
|
const state = this.getState(guildId);
|
|
63
|
-
if (state.raidActive)
|
|
63
|
+
if (state.raidActive) {
|
|
64
64
|
return 'already_active';
|
|
65
|
-
|
|
65
|
+
}
|
|
66
|
+
if (this.enabling.has(guildId)) {
|
|
66
67
|
return 'already_enabling';
|
|
68
|
+
}
|
|
67
69
|
this.enabling.add(guildId);
|
|
68
70
|
try {
|
|
69
71
|
state.raidActive = true;
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { ScoringRubric } from './rubric';
|
|
2
|
+
/**
|
|
3
|
+
* Aggressive defaults. Flag anything moderately suspicious, delete at
|
|
4
|
+
* 75%, timeout above 90%. Good for tight-knit communities with low
|
|
5
|
+
* tolerance for borderline content.
|
|
6
|
+
*/
|
|
7
|
+
export declare const STRICT_RUBRIC: ScoringRubric;
|
|
8
|
+
/**
|
|
9
|
+
* Conservative defaults. Only act on very high confidence. Good for
|
|
10
|
+
* large communities where false positives are worse than false
|
|
11
|
+
* negatives.
|
|
12
|
+
*/
|
|
13
|
+
export declare const PERMISSIVE_RUBRIC: ScoringRubric;
|
|
14
|
+
/**
|
|
15
|
+
* NSFW-only rubric. Ignores harassment, hate, violence — only acts on
|
|
16
|
+
* sexual content. Useful when text moderation is handled out-of-band
|
|
17
|
+
* but image / link moderation still needs safety filtering.
|
|
18
|
+
*/
|
|
19
|
+
export declare const NSFW_ONLY_RUBRIC: ScoringRubric;
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.NSFW_ONLY_RUBRIC = exports.PERMISSIVE_RUBRIC = exports.STRICT_RUBRIC = void 0;
|
|
4
|
+
const actions_1 = require("../actions");
|
|
5
|
+
const rubric_1 = require("./rubric");
|
|
6
|
+
/**
|
|
7
|
+
* Aggressive defaults. Flag anything moderately suspicious, delete at
|
|
8
|
+
* 75%, timeout above 90%. Good for tight-knit communities with low
|
|
9
|
+
* tolerance for borderline content.
|
|
10
|
+
*/
|
|
11
|
+
exports.STRICT_RUBRIC = new rubric_1.ScoringRubric([
|
|
12
|
+
{ match: { minConfidence: 50 }, action: actions_1.ACTION_TYPES.WARN, severity: 'low' },
|
|
13
|
+
{ match: { minConfidence: 75 }, action: actions_1.ACTION_TYPES.DELETE, severity: 'medium' },
|
|
14
|
+
{ match: { minConfidence: 90 }, action: actions_1.ACTION_TYPES.TIMEOUT, severity: 'high' },
|
|
15
|
+
]);
|
|
16
|
+
/**
|
|
17
|
+
* Conservative defaults. Only act on very high confidence. Good for
|
|
18
|
+
* large communities where false positives are worse than false
|
|
19
|
+
* negatives.
|
|
20
|
+
*/
|
|
21
|
+
exports.PERMISSIVE_RUBRIC = new rubric_1.ScoringRubric([
|
|
22
|
+
{ match: { minConfidence: 95 }, action: actions_1.ACTION_TYPES.DELETE, severity: 'high' },
|
|
23
|
+
{ match: { minConfidence: 99 }, action: actions_1.ACTION_TYPES.TIMEOUT, severity: 'critical' },
|
|
24
|
+
]);
|
|
25
|
+
/**
|
|
26
|
+
* NSFW-only rubric. Ignores harassment, hate, violence — only acts on
|
|
27
|
+
* sexual content. Useful when text moderation is handled out-of-band
|
|
28
|
+
* but image / link moderation still needs safety filtering.
|
|
29
|
+
*/
|
|
30
|
+
exports.NSFW_ONLY_RUBRIC = new rubric_1.ScoringRubric([
|
|
31
|
+
{ match: { category: 'sexual', minConfidence: 70 }, action: actions_1.ACTION_TYPES.DELETE, severity: 'high' },
|
|
32
|
+
]);
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
+
};
|
|
16
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
+
__exportStar(require("./types"), exports);
|
|
18
|
+
__exportStar(require("./rubric"), exports);
|
|
19
|
+
__exportStar(require("./defaults"), exports);
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import type { ModerationCategory } from '../types';
|
|
2
|
+
import type { RubricResult, RubricRule } from './types';
|
|
3
|
+
/**
|
|
4
|
+
* Composable classifier-to-action mapping.
|
|
5
|
+
*
|
|
6
|
+
* Callers hand a `ScoringRubric` their classifier output
|
|
7
|
+
* (`ModerationCategory[]`) and get back a decision: the recommended
|
|
8
|
+
* action, the aggregate severity, and the categories that contributed.
|
|
9
|
+
*
|
|
10
|
+
* Apps can use the ship-with defaults (`STRICT_RUBRIC`,
|
|
11
|
+
* `PERMISSIVE_RUBRIC`, `NSFW_ONLY_RUBRIC`) or pass custom rules.
|
|
12
|
+
*
|
|
13
|
+
* Rules are evaluated in input order; the highest-severity match wins
|
|
14
|
+
* the recommended action. Every matched category is returned in the
|
|
15
|
+
* result so callers can explain the decision.
|
|
16
|
+
*/
|
|
17
|
+
export declare class ScoringRubric {
|
|
18
|
+
private rules;
|
|
19
|
+
constructor(rules: RubricRule[]);
|
|
20
|
+
evaluate(categories: ModerationCategory[]): RubricResult;
|
|
21
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.ScoringRubric = void 0;
|
|
4
|
+
const SEVERITY_ORDER = {
|
|
5
|
+
low: 0,
|
|
6
|
+
medium: 1,
|
|
7
|
+
high: 2,
|
|
8
|
+
critical: 3,
|
|
9
|
+
};
|
|
10
|
+
/**
|
|
11
|
+
* Composable classifier-to-action mapping.
|
|
12
|
+
*
|
|
13
|
+
* Callers hand a `ScoringRubric` their classifier output
|
|
14
|
+
* (`ModerationCategory[]`) and get back a decision: the recommended
|
|
15
|
+
* action, the aggregate severity, and the categories that contributed.
|
|
16
|
+
*
|
|
17
|
+
* Apps can use the ship-with defaults (`STRICT_RUBRIC`,
|
|
18
|
+
* `PERMISSIVE_RUBRIC`, `NSFW_ONLY_RUBRIC`) or pass custom rules.
|
|
19
|
+
*
|
|
20
|
+
* Rules are evaluated in input order; the highest-severity match wins
|
|
21
|
+
* the recommended action. Every matched category is returned in the
|
|
22
|
+
* result so callers can explain the decision.
|
|
23
|
+
*/
|
|
24
|
+
class ScoringRubric {
|
|
25
|
+
rules;
|
|
26
|
+
constructor(rules) {
|
|
27
|
+
this.rules = rules;
|
|
28
|
+
}
|
|
29
|
+
evaluate(categories) {
|
|
30
|
+
const matched = [];
|
|
31
|
+
const reasons = [];
|
|
32
|
+
let best = null;
|
|
33
|
+
for (const category of categories) {
|
|
34
|
+
for (const rule of this.rules) {
|
|
35
|
+
const categoryMatches = rule.match.category === undefined || category.category.includes(rule.match.category);
|
|
36
|
+
if (!categoryMatches) {
|
|
37
|
+
continue;
|
|
38
|
+
}
|
|
39
|
+
if (category.confidence < rule.match.minConfidence) {
|
|
40
|
+
continue;
|
|
41
|
+
}
|
|
42
|
+
matched.push(category);
|
|
43
|
+
reasons.push(`${category.category} (${category.confidence.toFixed(0)}%) >= ${rule.match.minConfidence}% → ${rule.action}`);
|
|
44
|
+
if (!best || SEVERITY_ORDER[rule.severity] > SEVERITY_ORDER[best.severity]) {
|
|
45
|
+
best = rule;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
return {
|
|
50
|
+
action: best?.action ?? null,
|
|
51
|
+
severity: best?.severity ?? 'low',
|
|
52
|
+
matched,
|
|
53
|
+
reasons,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
exports.ScoringRubric = ScoringRubric;
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import type { ModerationCategory, Severity } from '../types';
|
|
2
|
+
import type { ActionType } from '../actions';
|
|
3
|
+
export interface RubricMatch {
|
|
4
|
+
/**
|
|
5
|
+
* Category name to match. Omit for a wildcard that fires on any category.
|
|
6
|
+
* Matched with substring `includes`, so a rule for `sexual` also fires
|
|
7
|
+
* on compound categories like `sexual/minors`.
|
|
8
|
+
*/
|
|
9
|
+
category?: string;
|
|
10
|
+
/** Minimum confidence (0-100) required for this rule to fire. */
|
|
11
|
+
minConfidence: number;
|
|
12
|
+
}
|
|
13
|
+
export interface RubricRule {
|
|
14
|
+
match: RubricMatch;
|
|
15
|
+
action: ActionType;
|
|
16
|
+
severity: Severity;
|
|
17
|
+
}
|
|
18
|
+
export interface RubricResult {
|
|
19
|
+
/** The action the highest-severity matched rule recommends. */
|
|
20
|
+
action: ActionType | null;
|
|
21
|
+
/** Aggregate severity — matches the winning rule, or `'low'` if nothing matched. */
|
|
22
|
+
severity: Severity;
|
|
23
|
+
/** Every category that matched at least one rule. */
|
|
24
|
+
matched: ModerationCategory[];
|
|
25
|
+
/** Human-readable explanations — one per matched rule. */
|
|
26
|
+
reasons: string[];
|
|
27
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@joliegg/moderation",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.9.0",
|
|
4
4
|
"description": "A set of tools for chat moderation",
|
|
5
5
|
"author": "Diana Islas Ocampo",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -31,6 +31,14 @@
|
|
|
31
31
|
"types": "./dist/raid/index.d.ts",
|
|
32
32
|
"default": "./dist/raid/index.js"
|
|
33
33
|
},
|
|
34
|
+
"./rubrics": {
|
|
35
|
+
"types": "./dist/rubrics/index.d.ts",
|
|
36
|
+
"default": "./dist/rubrics/index.js"
|
|
37
|
+
},
|
|
38
|
+
"./audit": {
|
|
39
|
+
"types": "./dist/audit/index.d.ts",
|
|
40
|
+
"default": "./dist/audit/index.js"
|
|
41
|
+
},
|
|
34
42
|
"./providers/google": {
|
|
35
43
|
"types": "./dist/providers/google.d.ts",
|
|
36
44
|
"default": "./dist/providers/google.js"
|
|
@@ -43,6 +51,10 @@
|
|
|
43
51
|
"types": "./dist/providers/webrisk.d.ts",
|
|
44
52
|
"default": "./dist/providers/webrisk.js"
|
|
45
53
|
},
|
|
54
|
+
"./providers/openai": {
|
|
55
|
+
"types": "./dist/providers/openai.d.ts",
|
|
56
|
+
"default": "./dist/providers/openai.js"
|
|
57
|
+
},
|
|
46
58
|
"./package.json": "./package.json"
|
|
47
59
|
},
|
|
48
60
|
"scripts": {
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import type { AuditEvent, AuditEventKind, AuditEventMap } from './events';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Subscriber handler type for a specific audit event kind. Sync or
|
|
5
|
+
* async; the emitter fire-and-forgets async handlers so slow
|
|
6
|
+
* persistence sinks don't block the enforcement path.
|
|
7
|
+
*/
|
|
8
|
+
export type AuditHandler<K extends AuditEventKind> = (event: AuditEventMap[K]) => void | Promise<void>;
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Typed event emitter for moderation audit events.
|
|
12
|
+
*
|
|
13
|
+
* Subscribers register for specific event kinds with full type safety.
|
|
14
|
+
* Handlers that throw are isolated — a broken subscriber never blocks
|
|
15
|
+
* other subscribers or the emitter itself.
|
|
16
|
+
*
|
|
17
|
+
* Intentionally NOT `extends EventEmitter` from `node:events`: the
|
|
18
|
+
* typing story for Node's built-in emitter is painful, and the feature
|
|
19
|
+
* set we need (on / off / emit) is small enough to implement directly.
|
|
20
|
+
*/
|
|
21
|
+
export class AuditTrailEmitter {
|
|
22
|
+
private handlers = new Map<AuditEventKind, Set<AuditHandler<AuditEventKind>>>();
|
|
23
|
+
|
|
24
|
+
on<K extends AuditEventKind>(kind: K, handler: AuditHandler<K>): this {
|
|
25
|
+
if (!this.handlers.has(kind)) {
|
|
26
|
+
this.handlers.set(kind, new Set());
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
this.handlers.get(kind)!.add(handler as AuditHandler<AuditEventKind>);
|
|
30
|
+
|
|
31
|
+
return this;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
off<K extends AuditEventKind>(kind: K, handler: AuditHandler<K>): this {
|
|
35
|
+
this.handlers.get(kind)?.delete(handler as AuditHandler<AuditEventKind>);
|
|
36
|
+
return this;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
emit(event: AuditEvent): void {
|
|
40
|
+
const handlers = this.handlers.get(event.kind);
|
|
41
|
+
|
|
42
|
+
if (!handlers) {
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
for (const handler of handlers) {
|
|
47
|
+
try {
|
|
48
|
+
// Fire-and-forget. Async handlers are not awaited so the
|
|
49
|
+
// emitter's caller isn't blocked on slow persistence sinks.
|
|
50
|
+
// Subscribers own their own error handling for async work.
|
|
51
|
+
const result = handler(event);
|
|
52
|
+
|
|
53
|
+
if (result instanceof Promise) {
|
|
54
|
+
result.catch(err => {
|
|
55
|
+
console.error(`[Jolie::Moderation::AuditTrail] async handler error for ${event.kind}:`, err);
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
} catch (err) {
|
|
59
|
+
console.error(`[Jolie::Moderation::AuditTrail] handler error for ${event.kind}:`, err);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
listenerCount(kind: AuditEventKind): number {
|
|
65
|
+
return this.handlers.get(kind)?.size ?? 0;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
removeAllListeners(kind?: AuditEventKind): this {
|
|
69
|
+
if (kind) {
|
|
70
|
+
this.handlers.delete(kind);
|
|
71
|
+
} else {
|
|
72
|
+
this.handlers.clear();
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return this;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import type { ActionType } from '../actions';
|
|
2
|
+
import type { SpamMessageRef, SpamReason } from '../spam/cache';
|
|
3
|
+
|
|
4
|
+
export type Platform = 'discord' | 'twitch';
|
|
5
|
+
|
|
6
|
+
/** Base fields shared by every audit event. */
|
|
7
|
+
interface AuditEventBase {
|
|
8
|
+
guildId: string;
|
|
9
|
+
platform: Platform;
|
|
10
|
+
timestamp: Date;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* A moderator (human or system) performed an enforcement action.
|
|
15
|
+
* This is the primary audit log entry.
|
|
16
|
+
*/
|
|
17
|
+
export interface ActionEvent extends AuditEventBase {
|
|
18
|
+
kind: 'action';
|
|
19
|
+
actionType: ActionType;
|
|
20
|
+
targetId: string;
|
|
21
|
+
targetName: string;
|
|
22
|
+
moderatorId: string;
|
|
23
|
+
moderatorName: string;
|
|
24
|
+
reason: string;
|
|
25
|
+
details?: string;
|
|
26
|
+
duration?: number;
|
|
27
|
+
channelId?: string;
|
|
28
|
+
messageId?: string;
|
|
29
|
+
evidence?: string[];
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Automatic spam detection fired. Independent of whether enforcement
|
|
34
|
+
* succeeded — the detection itself is the audit event.
|
|
35
|
+
*/
|
|
36
|
+
export interface SpamDetectedEvent extends AuditEventBase {
|
|
37
|
+
kind: 'spam_detected';
|
|
38
|
+
userId: string;
|
|
39
|
+
reason: SpamReason;
|
|
40
|
+
details?: string;
|
|
41
|
+
priorMessageIds: SpamMessageRef[];
|
|
42
|
+
channelId?: string;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/** Raid mode detection crossed the join-count threshold. */
|
|
46
|
+
export interface RaidDetectedEvent extends AuditEventBase {
|
|
47
|
+
kind: 'raid_detected';
|
|
48
|
+
joinCount: number;
|
|
49
|
+
windowSeconds: number;
|
|
50
|
+
autoTriggered: boolean;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/** A message was blocked by the per-user permissions check. */
|
|
54
|
+
export interface PermissionBlockEvent extends AuditEventBase {
|
|
55
|
+
kind: 'permission_block';
|
|
56
|
+
userId: string;
|
|
57
|
+
violations: string[];
|
|
58
|
+
channelId?: string;
|
|
59
|
+
messageId?: string;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/** An appeal was reviewed (approved or denied). */
|
|
63
|
+
export interface AppealReviewedEvent extends AuditEventBase {
|
|
64
|
+
kind: 'appeal_reviewed';
|
|
65
|
+
appealId: string;
|
|
66
|
+
userId: string;
|
|
67
|
+
reviewerId: string;
|
|
68
|
+
approved: boolean;
|
|
69
|
+
note?: string;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/** Union of every audit event shape. Add new kinds here. */
|
|
73
|
+
export type AuditEvent =
|
|
74
|
+
| ActionEvent
|
|
75
|
+
| SpamDetectedEvent
|
|
76
|
+
| RaidDetectedEvent
|
|
77
|
+
| PermissionBlockEvent
|
|
78
|
+
| AppealReviewedEvent;
|
|
79
|
+
|
|
80
|
+
/** Map from event kind → concrete event type. Used by the emitter. */
|
|
81
|
+
export interface AuditEventMap {
|
|
82
|
+
action: ActionEvent;
|
|
83
|
+
spam_detected: SpamDetectedEvent;
|
|
84
|
+
raid_detected: RaidDetectedEvent;
|
|
85
|
+
permission_block: PermissionBlockEvent;
|
|
86
|
+
appeal_reviewed: AppealReviewedEvent;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
export type AuditEventKind = keyof AuditEventMap;
|
package/src/client.ts
CHANGED
|
@@ -5,6 +5,7 @@ import { SpeechClient } from '@google-cloud/speech';
|
|
|
5
5
|
import { GoogleLanguageProvider, GoogleSpeechProvider, fetchAudio } from './providers/google';
|
|
6
6
|
import { RekognitionProvider } from './providers/aws';
|
|
7
7
|
import { WebRiskProvider } from './providers/webrisk';
|
|
8
|
+
import { OpenAIModerationProvider } from './providers/openai';
|
|
8
9
|
import URLBlackList from './data/url-blacklist.json';
|
|
9
10
|
import URLShortenerList from './data/url-shorteners.json';
|
|
10
11
|
|
|
@@ -19,6 +20,7 @@ export class ModerationClient {
|
|
|
19
20
|
private googleSpeech?: GoogleSpeechProvider;
|
|
20
21
|
private aws?: RekognitionProvider;
|
|
21
22
|
private webRisk?: WebRiskProvider;
|
|
23
|
+
private openai?: OpenAIModerationProvider;
|
|
22
24
|
private banList: string[] = [];
|
|
23
25
|
private urlBlackList: string[] = [];
|
|
24
26
|
|
|
@@ -41,6 +43,10 @@ export class ModerationClient {
|
|
|
41
43
|
this.webRisk = new WebRiskProvider(configuration.google.apiKey);
|
|
42
44
|
}
|
|
43
45
|
|
|
46
|
+
if (typeof configuration.openai?.apiKey === 'string') {
|
|
47
|
+
this.openai = new OpenAIModerationProvider(configuration.openai.apiKey);
|
|
48
|
+
}
|
|
49
|
+
|
|
44
50
|
if (Array.isArray(configuration.banList)) {
|
|
45
51
|
this.banList = configuration.banList;
|
|
46
52
|
}
|
|
@@ -50,7 +56,11 @@ export class ModerationClient {
|
|
|
50
56
|
}
|
|
51
57
|
}
|
|
52
58
|
|
|
53
|
-
async moderateText(
|
|
59
|
+
async moderateText(
|
|
60
|
+
text: string,
|
|
61
|
+
minimumConfidence: number = 50,
|
|
62
|
+
options: { provider?: 'google' | 'openai' | 'all' } = {},
|
|
63
|
+
): Promise<ModerationResult> {
|
|
54
64
|
const categories: ModerationCategory[] = [];
|
|
55
65
|
const normalized = text.toLowerCase();
|
|
56
66
|
const matches = this.banList.filter(w => normalized.includes(w));
|
|
@@ -60,13 +70,19 @@ export class ModerationClient {
|
|
|
60
70
|
categories.push({ category: 'BAN_LIST', confidence: (matches.length / words.length) * 100 });
|
|
61
71
|
}
|
|
62
72
|
|
|
63
|
-
|
|
64
|
-
|
|
73
|
+
const provider = options.provider ?? 'google';
|
|
74
|
+
|
|
75
|
+
if ((provider === 'google' || provider === 'all') && this.googleLanguage) {
|
|
76
|
+
const google = await this.googleLanguage.moderateText(text, minimumConfidence);
|
|
77
|
+
categories.push(...google);
|
|
65
78
|
}
|
|
66
79
|
|
|
67
|
-
|
|
80
|
+
if ((provider === 'openai' || provider === 'all') && this.openai) {
|
|
81
|
+
const openai = await this.openai.moderateText(text, minimumConfidence);
|
|
82
|
+
categories.push(...openai);
|
|
83
|
+
}
|
|
68
84
|
|
|
69
|
-
return { source: text, moderation:
|
|
85
|
+
return { source: text, moderation: categories };
|
|
70
86
|
}
|
|
71
87
|
|
|
72
88
|
async moderateImage(url: string, minimumConfidence: number = 50): Promise<ModerationResult> {
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import type { ModerationCategory } from '../types';
|
|
2
|
+
|
|
3
|
+
interface OpenAIModerationResponse {
|
|
4
|
+
results?: Array<{
|
|
5
|
+
flagged: boolean;
|
|
6
|
+
categories: Record<string, boolean>;
|
|
7
|
+
category_scores: Record<string, number>;
|
|
8
|
+
}>;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* OpenAI Moderation API adapter.
|
|
13
|
+
*
|
|
14
|
+
* Uses the free `omni-moderation-latest` endpoint (no token cost,
|
|
15
|
+
* no SDK). Returns `ModerationCategory[]` in the same shape the
|
|
16
|
+
* Google and AWS providers return, so downstream callers can treat
|
|
17
|
+
* every provider uniformly.
|
|
18
|
+
*/
|
|
19
|
+
export class OpenAIModerationProvider {
|
|
20
|
+
constructor(
|
|
21
|
+
private apiKey: string,
|
|
22
|
+
private model: string = 'omni-moderation-latest',
|
|
23
|
+
) {}
|
|
24
|
+
|
|
25
|
+
async moderateText(text: string, minimumConfidence: number = 0): Promise<ModerationCategory[]> {
|
|
26
|
+
if (!this.apiKey) {
|
|
27
|
+
return [];
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
let response: Response;
|
|
31
|
+
|
|
32
|
+
try {
|
|
33
|
+
response = await fetch('https://api.openai.com/v1/moderations', {
|
|
34
|
+
method: 'POST',
|
|
35
|
+
headers: {
|
|
36
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
37
|
+
'Content-Type': 'application/json',
|
|
38
|
+
},
|
|
39
|
+
body: JSON.stringify({ model: this.model, input: text }),
|
|
40
|
+
});
|
|
41
|
+
} catch {
|
|
42
|
+
return [];
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (!response.ok) {
|
|
46
|
+
return [];
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const data = (await response.json()) as OpenAIModerationResponse;
|
|
50
|
+
const result = data.results?.[0];
|
|
51
|
+
|
|
52
|
+
if (!result) {
|
|
53
|
+
return [];
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return Object.entries(result.categories)
|
|
57
|
+
.filter(([, flagged]) => flagged)
|
|
58
|
+
.map(([category]) => ({
|
|
59
|
+
category,
|
|
60
|
+
confidence: (result.category_scores[category] ?? 0) * 100,
|
|
61
|
+
}))
|
|
62
|
+
.filter(c => c.confidence >= minimumConfidence);
|
|
63
|
+
}
|
|
64
|
+
}
|
package/src/raid/detector.ts
CHANGED
|
@@ -59,6 +59,7 @@ export class RaidDetector {
|
|
|
59
59
|
if (!this.state.has(guildId)) {
|
|
60
60
|
this.state.set(guildId, { joins: [], raidActive: false });
|
|
61
61
|
}
|
|
62
|
+
|
|
62
63
|
return this.state.get(guildId)!;
|
|
63
64
|
}
|
|
64
65
|
|
|
@@ -74,8 +75,10 @@ export class RaidDetector {
|
|
|
74
75
|
track(guildId: string, member: MemberJoin): RaidTrackResult {
|
|
75
76
|
const now = Date.now();
|
|
76
77
|
const state = this.getState(guildId);
|
|
78
|
+
|
|
77
79
|
this.cleanupJoins(state, now);
|
|
78
80
|
state.joins.push({ memberId: member.memberId, timestamp: now });
|
|
81
|
+
|
|
79
82
|
return {
|
|
80
83
|
isRaid: state.joins.length >= this.joinThreshold,
|
|
81
84
|
joinCount: state.joins.length,
|
|
@@ -95,9 +98,17 @@ export class RaidDetector {
|
|
|
95
98
|
*/
|
|
96
99
|
async tryEnable(guildId: string): Promise<EnableResult> {
|
|
97
100
|
const state = this.getState(guildId);
|
|
98
|
-
|
|
99
|
-
if (
|
|
101
|
+
|
|
102
|
+
if (state.raidActive) {
|
|
103
|
+
return 'already_active';
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
if (this.enabling.has(guildId)) {
|
|
107
|
+
return 'already_enabling';
|
|
108
|
+
}
|
|
109
|
+
|
|
100
110
|
this.enabling.add(guildId);
|
|
111
|
+
|
|
101
112
|
try {
|
|
102
113
|
state.raidActive = true;
|
|
103
114
|
return 'enabled';
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { ACTION_TYPES } from '../actions';
|
|
2
|
+
import { ScoringRubric } from './rubric';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Aggressive defaults. Flag anything moderately suspicious, delete at
|
|
6
|
+
* 75%, timeout above 90%. Good for tight-knit communities with low
|
|
7
|
+
* tolerance for borderline content.
|
|
8
|
+
*/
|
|
9
|
+
export const STRICT_RUBRIC = new ScoringRubric([
|
|
10
|
+
{ match: { minConfidence: 50 }, action: ACTION_TYPES.WARN, severity: 'low' },
|
|
11
|
+
{ match: { minConfidence: 75 }, action: ACTION_TYPES.DELETE, severity: 'medium' },
|
|
12
|
+
{ match: { minConfidence: 90 }, action: ACTION_TYPES.TIMEOUT, severity: 'high' },
|
|
13
|
+
]);
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Conservative defaults. Only act on very high confidence. Good for
|
|
17
|
+
* large communities where false positives are worse than false
|
|
18
|
+
* negatives.
|
|
19
|
+
*/
|
|
20
|
+
export const PERMISSIVE_RUBRIC = new ScoringRubric([
|
|
21
|
+
{ match: { minConfidence: 95 }, action: ACTION_TYPES.DELETE, severity: 'high' },
|
|
22
|
+
{ match: { minConfidence: 99 }, action: ACTION_TYPES.TIMEOUT, severity: 'critical' },
|
|
23
|
+
]);
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* NSFW-only rubric. Ignores harassment, hate, violence — only acts on
|
|
27
|
+
* sexual content. Useful when text moderation is handled out-of-band
|
|
28
|
+
* but image / link moderation still needs safety filtering.
|
|
29
|
+
*/
|
|
30
|
+
export const NSFW_ONLY_RUBRIC = new ScoringRubric([
|
|
31
|
+
{ match: { category: 'sexual', minConfidence: 70 }, action: ACTION_TYPES.DELETE, severity: 'high' },
|
|
32
|
+
]);
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import type { ModerationCategory, Severity } from '../types';
|
|
2
|
+
import type { RubricResult, RubricRule } from './types';
|
|
3
|
+
|
|
4
|
+
const SEVERITY_ORDER: Record<Severity, number> = {
|
|
5
|
+
low: 0,
|
|
6
|
+
medium: 1,
|
|
7
|
+
high: 2,
|
|
8
|
+
critical: 3,
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Composable classifier-to-action mapping.
|
|
13
|
+
*
|
|
14
|
+
* Callers hand a `ScoringRubric` their classifier output
|
|
15
|
+
* (`ModerationCategory[]`) and get back a decision: the recommended
|
|
16
|
+
* action, the aggregate severity, and the categories that contributed.
|
|
17
|
+
*
|
|
18
|
+
* Apps can use the ship-with defaults (`STRICT_RUBRIC`,
|
|
19
|
+
* `PERMISSIVE_RUBRIC`, `NSFW_ONLY_RUBRIC`) or pass custom rules.
|
|
20
|
+
*
|
|
21
|
+
* Rules are evaluated in input order; the highest-severity match wins
|
|
22
|
+
* the recommended action. Every matched category is returned in the
|
|
23
|
+
* result so callers can explain the decision.
|
|
24
|
+
*/
|
|
25
|
+
export class ScoringRubric {
|
|
26
|
+
constructor(private rules: RubricRule[]) {}
|
|
27
|
+
|
|
28
|
+
evaluate(categories: ModerationCategory[]): RubricResult {
|
|
29
|
+
const matched: ModerationCategory[] = [];
|
|
30
|
+
const reasons: string[] = [];
|
|
31
|
+
|
|
32
|
+
let best: RubricRule | null = null;
|
|
33
|
+
|
|
34
|
+
for (const category of categories) {
|
|
35
|
+
for (const rule of this.rules) {
|
|
36
|
+
const categoryMatches = rule.match.category === undefined || category.category.includes(rule.match.category);
|
|
37
|
+
|
|
38
|
+
if (!categoryMatches) {
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (category.confidence < rule.match.minConfidence) {
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
matched.push(category);
|
|
47
|
+
reasons.push(`${category.category} (${category.confidence.toFixed(0)}%) >= ${rule.match.minConfidence}% → ${rule.action}`);
|
|
48
|
+
|
|
49
|
+
if (!best || SEVERITY_ORDER[rule.severity] > SEVERITY_ORDER[best.severity]) {
|
|
50
|
+
best = rule;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return {
|
|
56
|
+
action: best?.action ?? null,
|
|
57
|
+
severity: best?.severity ?? 'low',
|
|
58
|
+
matched,
|
|
59
|
+
reasons,
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import type { ModerationCategory, Severity } from '../types';
|
|
2
|
+
import type { ActionType } from '../actions';
|
|
3
|
+
|
|
4
|
+
export interface RubricMatch {
|
|
5
|
+
/**
|
|
6
|
+
* Category name to match. Omit for a wildcard that fires on any category.
|
|
7
|
+
* Matched with substring `includes`, so a rule for `sexual` also fires
|
|
8
|
+
* on compound categories like `sexual/minors`.
|
|
9
|
+
*/
|
|
10
|
+
category?: string;
|
|
11
|
+
/** Minimum confidence (0-100) required for this rule to fire. */
|
|
12
|
+
minConfidence: number;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export interface RubricRule {
|
|
16
|
+
match: RubricMatch;
|
|
17
|
+
action: ActionType;
|
|
18
|
+
severity: Severity;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface RubricResult {
|
|
22
|
+
/** The action the highest-severity matched rule recommends. */
|
|
23
|
+
action: ActionType | null;
|
|
24
|
+
/** Aggregate severity — matches the winning rule, or `'low'` if nothing matched. */
|
|
25
|
+
severity: Severity;
|
|
26
|
+
/** Every category that matched at least one rule. */
|
|
27
|
+
matched: ModerationCategory[];
|
|
28
|
+
/** Human-readable explanations — one per matched rule. */
|
|
29
|
+
reasons: string[];
|
|
30
|
+
}
|