@shankarkharel/profanity-core 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +79 -0
- package/dist/index.d.ts +79 -0
- package/dist/index.js +227 -0
- package/dist/index.mjs +194 -0
- package/package.json +28 -0
package/dist/index.d.mts
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
type Severity = 1 | 2 | 3 | 4 | 5;
|
|
2
|
+
type MatchKind = "word" | "phrase";
|
|
3
|
+
interface TermEntry {
|
|
4
|
+
term: string;
|
|
5
|
+
severity: Severity;
|
|
6
|
+
category?: string[];
|
|
7
|
+
match?: MatchKind;
|
|
8
|
+
variants?: string[];
|
|
9
|
+
}
|
|
10
|
+
interface NormalizerStep {
|
|
11
|
+
name: string;
|
|
12
|
+
run: (input: string) => string;
|
|
13
|
+
}
|
|
14
|
+
interface LanguagePack {
|
|
15
|
+
code: string;
|
|
16
|
+
version: string;
|
|
17
|
+
terms: TermEntry[];
|
|
18
|
+
allowlist?: string[];
|
|
19
|
+
normalizers?: NormalizerStep[];
|
|
20
|
+
}
|
|
21
|
+
interface MatchDetail {
|
|
22
|
+
pack: string;
|
|
23
|
+
term: string;
|
|
24
|
+
severity: Severity;
|
|
25
|
+
category: string[];
|
|
26
|
+
index: number;
|
|
27
|
+
}
|
|
28
|
+
interface AnalyzeResult {
|
|
29
|
+
profane: boolean;
|
|
30
|
+
score: number;
|
|
31
|
+
maxSeverity: Severity | 0;
|
|
32
|
+
matches: MatchDetail[];
|
|
33
|
+
}
|
|
34
|
+
interface EngineOptions {
|
|
35
|
+
severityThreshold?: Severity;
|
|
36
|
+
enabledLanguages?: string[];
|
|
37
|
+
extraTerms?: TermEntry[];
|
|
38
|
+
extraAllowlist?: string[];
|
|
39
|
+
enableRepeatCollapse?: boolean;
|
|
40
|
+
maxTextLength?: number;
|
|
41
|
+
}
|
|
42
|
+
interface CensorOptions {
|
|
43
|
+
censorChar?: string;
|
|
44
|
+
preserveFirstLast?: boolean;
|
|
45
|
+
replaceWith?: string;
|
|
46
|
+
}
|
|
47
|
+
interface CensorOptions {
|
|
48
|
+
censorChar?: string;
|
|
49
|
+
replaceWith?: string;
|
|
50
|
+
preserveFirstLast?: boolean;
|
|
51
|
+
preservePrefix?: number;
|
|
52
|
+
preserveSuffix?: number;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
declare class ProfanityEngine {
|
|
56
|
+
private packs;
|
|
57
|
+
private opts;
|
|
58
|
+
constructor(packs: LanguagePack[], options?: EngineOptions);
|
|
59
|
+
analyze(input: string): AnalyzeResult;
|
|
60
|
+
isProfane(text: string): boolean;
|
|
61
|
+
censor(text: string, options?: {
|
|
62
|
+
censorChar?: string;
|
|
63
|
+
replaceWith?: string;
|
|
64
|
+
preserveFirstLast?: boolean;
|
|
65
|
+
preservePrefix?: number;
|
|
66
|
+
preserveSuffix?: number;
|
|
67
|
+
}): string;
|
|
68
|
+
private expandTerms;
|
|
69
|
+
private normalizeForPack;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
declare const nfkc: NormalizerStep;
|
|
73
|
+
declare const lower: NormalizerStep;
|
|
74
|
+
declare const collapseWhitespace: NormalizerStep;
|
|
75
|
+
declare const stripPunctuation: NormalizerStep;
|
|
76
|
+
declare function collapseRepeats(maxRepeats?: number): NormalizerStep;
|
|
77
|
+
declare const leetspeak: NormalizerStep;
|
|
78
|
+
|
|
79
|
+
export { type AnalyzeResult, type CensorOptions, type EngineOptions, type LanguagePack, type MatchDetail, type MatchKind, type NormalizerStep, ProfanityEngine, type Severity, type TermEntry, collapseRepeats, collapseWhitespace, leetspeak, lower, nfkc, stripPunctuation };
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
type Severity = 1 | 2 | 3 | 4 | 5;
|
|
2
|
+
type MatchKind = "word" | "phrase";
|
|
3
|
+
interface TermEntry {
|
|
4
|
+
term: string;
|
|
5
|
+
severity: Severity;
|
|
6
|
+
category?: string[];
|
|
7
|
+
match?: MatchKind;
|
|
8
|
+
variants?: string[];
|
|
9
|
+
}
|
|
10
|
+
interface NormalizerStep {
|
|
11
|
+
name: string;
|
|
12
|
+
run: (input: string) => string;
|
|
13
|
+
}
|
|
14
|
+
interface LanguagePack {
|
|
15
|
+
code: string;
|
|
16
|
+
version: string;
|
|
17
|
+
terms: TermEntry[];
|
|
18
|
+
allowlist?: string[];
|
|
19
|
+
normalizers?: NormalizerStep[];
|
|
20
|
+
}
|
|
21
|
+
interface MatchDetail {
|
|
22
|
+
pack: string;
|
|
23
|
+
term: string;
|
|
24
|
+
severity: Severity;
|
|
25
|
+
category: string[];
|
|
26
|
+
index: number;
|
|
27
|
+
}
|
|
28
|
+
interface AnalyzeResult {
|
|
29
|
+
profane: boolean;
|
|
30
|
+
score: number;
|
|
31
|
+
maxSeverity: Severity | 0;
|
|
32
|
+
matches: MatchDetail[];
|
|
33
|
+
}
|
|
34
|
+
interface EngineOptions {
|
|
35
|
+
severityThreshold?: Severity;
|
|
36
|
+
enabledLanguages?: string[];
|
|
37
|
+
extraTerms?: TermEntry[];
|
|
38
|
+
extraAllowlist?: string[];
|
|
39
|
+
enableRepeatCollapse?: boolean;
|
|
40
|
+
maxTextLength?: number;
|
|
41
|
+
}
|
|
42
|
+
interface CensorOptions {
|
|
43
|
+
censorChar?: string;
|
|
44
|
+
preserveFirstLast?: boolean;
|
|
45
|
+
replaceWith?: string;
|
|
46
|
+
}
|
|
47
|
+
interface CensorOptions {
|
|
48
|
+
censorChar?: string;
|
|
49
|
+
replaceWith?: string;
|
|
50
|
+
preserveFirstLast?: boolean;
|
|
51
|
+
preservePrefix?: number;
|
|
52
|
+
preserveSuffix?: number;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
declare class ProfanityEngine {
|
|
56
|
+
private packs;
|
|
57
|
+
private opts;
|
|
58
|
+
constructor(packs: LanguagePack[], options?: EngineOptions);
|
|
59
|
+
analyze(input: string): AnalyzeResult;
|
|
60
|
+
isProfane(text: string): boolean;
|
|
61
|
+
censor(text: string, options?: {
|
|
62
|
+
censorChar?: string;
|
|
63
|
+
replaceWith?: string;
|
|
64
|
+
preserveFirstLast?: boolean;
|
|
65
|
+
preservePrefix?: number;
|
|
66
|
+
preserveSuffix?: number;
|
|
67
|
+
}): string;
|
|
68
|
+
private expandTerms;
|
|
69
|
+
private normalizeForPack;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
declare const nfkc: NormalizerStep;
|
|
73
|
+
declare const lower: NormalizerStep;
|
|
74
|
+
declare const collapseWhitespace: NormalizerStep;
|
|
75
|
+
declare const stripPunctuation: NormalizerStep;
|
|
76
|
+
declare function collapseRepeats(maxRepeats?: number): NormalizerStep;
|
|
77
|
+
declare const leetspeak: NormalizerStep;
|
|
78
|
+
|
|
79
|
+
export { type AnalyzeResult, type CensorOptions, type EngineOptions, type LanguagePack, type MatchDetail, type MatchKind, type NormalizerStep, ProfanityEngine, type Severity, type TermEntry, collapseRepeats, collapseWhitespace, leetspeak, lower, nfkc, stripPunctuation };
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
|
|
20
|
+
// src/index.ts
|
|
21
|
+
var index_exports = {};
|
|
22
|
+
__export(index_exports, {
|
|
23
|
+
ProfanityEngine: () => ProfanityEngine,
|
|
24
|
+
collapseRepeats: () => collapseRepeats,
|
|
25
|
+
collapseWhitespace: () => collapseWhitespace,
|
|
26
|
+
leetspeak: () => leetspeak,
|
|
27
|
+
lower: () => lower,
|
|
28
|
+
nfkc: () => nfkc,
|
|
29
|
+
stripPunctuation: () => stripPunctuation
|
|
30
|
+
});
|
|
31
|
+
module.exports = __toCommonJS(index_exports);
|
|
32
|
+
|
|
33
|
+
// src/tokenize.ts
|
|
34
|
+
function tokenizeWords(text) {
|
|
35
|
+
const tokens = [];
|
|
36
|
+
let i = 0;
|
|
37
|
+
while (i < text.length) {
|
|
38
|
+
while (i < text.length && text[i] === " ") i++;
|
|
39
|
+
const start = i;
|
|
40
|
+
while (i < text.length && text[i] !== " ") i++;
|
|
41
|
+
const end = i;
|
|
42
|
+
if (end > start) {
|
|
43
|
+
tokens.push({ value: text.slice(start, end), start, end });
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
return tokens;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// src/normalizers.ts
|
|
50
|
+
var nfkc = {
|
|
51
|
+
name: "nfkc",
|
|
52
|
+
run: (s) => s.normalize("NFKC")
|
|
53
|
+
};
|
|
54
|
+
var lower = {
|
|
55
|
+
name: "lower",
|
|
56
|
+
run: (s) => s.toLowerCase()
|
|
57
|
+
};
|
|
58
|
+
var collapseWhitespace = {
|
|
59
|
+
name: "collapseWhitespace",
|
|
60
|
+
run: (s) => s.replace(/\s+/g, " ").trim()
|
|
61
|
+
};
|
|
62
|
+
var stripPunctuation = {
|
|
63
|
+
name: "stripPunctuation",
|
|
64
|
+
run: (s) => s.replace(/[^\p{L}\p{N}\u0900-\u097F ]+/gu, " ")
|
|
65
|
+
};
|
|
66
|
+
function collapseRepeats(maxRepeats = 2) {
|
|
67
|
+
return {
|
|
68
|
+
name: "collapseRepeats",
|
|
69
|
+
run: (s) => s.replace(/(.)\1{2,}/g, (_m, ch) => String(ch).repeat(maxRepeats))
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
var leetspeak = {
|
|
73
|
+
name: "leetspeak",
|
|
74
|
+
run: (s) => s.replace(/[@]/g, "a").replace(/[!]/g, "i").replace(/[0]/g, "o").replace(/[1|]/g, "i").replace(/[$]/g, "s")
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
// src/engine.ts
|
|
78
|
+
function uniq(arr) {
|
|
79
|
+
return Array.from(new Set(arr));
|
|
80
|
+
}
|
|
81
|
+
function clampText(text, max) {
|
|
82
|
+
if (text.length <= max) return text;
|
|
83
|
+
return text.slice(0, max);
|
|
84
|
+
}
|
|
85
|
+
var ProfanityEngine = class {
|
|
86
|
+
constructor(packs, options) {
|
|
87
|
+
this.packs = [];
|
|
88
|
+
this.packs = packs;
|
|
89
|
+
this.opts = {
|
|
90
|
+
severityThreshold: options?.severityThreshold ?? 1,
|
|
91
|
+
enabledLanguages: options?.enabledLanguages ?? [],
|
|
92
|
+
extraTerms: options?.extraTerms ?? [],
|
|
93
|
+
extraAllowlist: options?.extraAllowlist ?? [],
|
|
94
|
+
enableRepeatCollapse: options?.enableRepeatCollapse ?? true,
|
|
95
|
+
maxTextLength: options?.maxTextLength ?? 2e4
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
analyze(input) {
|
|
99
|
+
const text = clampText(input ?? "", this.opts.maxTextLength);
|
|
100
|
+
const enabled = this.opts.enabledLanguages.length ? this.packs.filter((p) => this.opts.enabledLanguages.includes(p.code)) : this.packs;
|
|
101
|
+
const matches = [];
|
|
102
|
+
for (const pack of enabled) {
|
|
103
|
+
const normalized = this.normalizeForPack(text, pack);
|
|
104
|
+
const allow = new Set(uniq([...pack.allowlist ?? [], ...this.opts.extraAllowlist]).map((x) => this.normalizeForPack(x, pack)));
|
|
105
|
+
const allTerms = [...pack.terms, ...this.opts.extraTerms];
|
|
106
|
+
const expanded = this.expandTerms(allTerms);
|
|
107
|
+
const tokens = tokenizeWords(normalized);
|
|
108
|
+
const wordIndex = /* @__PURE__ */ new Map();
|
|
109
|
+
for (const t of expanded.filter((x) => (x.match ?? "word") === "word")) {
|
|
110
|
+
const key = this.normalizeForPack(t.term, pack);
|
|
111
|
+
if (!wordIndex.has(key)) wordIndex.set(key, []);
|
|
112
|
+
wordIndex.get(key).push(t);
|
|
113
|
+
}
|
|
114
|
+
for (const tok of tokens) {
|
|
115
|
+
if (allow.has(tok.value)) continue;
|
|
116
|
+
const hit = wordIndex.get(tok.value);
|
|
117
|
+
if (hit) {
|
|
118
|
+
for (const h of hit) {
|
|
119
|
+
if (h.severity < this.opts.severityThreshold) continue;
|
|
120
|
+
matches.push({
|
|
121
|
+
pack: pack.code,
|
|
122
|
+
term: h.term,
|
|
123
|
+
severity: h.severity,
|
|
124
|
+
category: h.category ?? [],
|
|
125
|
+
index: tok.start
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
const phrases = expanded.filter((x) => (x.match ?? "word") === "phrase");
|
|
131
|
+
for (const ph of phrases) {
|
|
132
|
+
const needle = this.normalizeForPack(ph.term, pack);
|
|
133
|
+
if (!needle || allow.has(needle)) continue;
|
|
134
|
+
let idx = normalized.indexOf(needle);
|
|
135
|
+
while (idx !== -1) {
|
|
136
|
+
if (ph.severity >= this.opts.severityThreshold) {
|
|
137
|
+
matches.push({
|
|
138
|
+
pack: pack.code,
|
|
139
|
+
term: ph.term,
|
|
140
|
+
severity: ph.severity,
|
|
141
|
+
category: ph.category ?? [],
|
|
142
|
+
index: idx
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
idx = normalized.indexOf(needle, idx + needle.length);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
const maxSeverity = matches.length ? matches.reduce((m, x) => x.severity > m ? x.severity : m, 0) : 0;
|
|
150
|
+
const score = matches.length ? Math.min(100, matches.length * 15 + maxSeverity * 10) : 0;
|
|
151
|
+
return {
|
|
152
|
+
profane: matches.length > 0,
|
|
153
|
+
score,
|
|
154
|
+
maxSeverity,
|
|
155
|
+
matches
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
isProfane(text) {
|
|
159
|
+
return this.analyze(text).profane;
|
|
160
|
+
}
|
|
161
|
+
censor(text, options) {
|
|
162
|
+
const result = this.analyze(text);
|
|
163
|
+
if (!result.profane) return text;
|
|
164
|
+
const censorChar = options?.censorChar ?? "*";
|
|
165
|
+
const replaceWith = options?.replaceWith;
|
|
166
|
+
const preservePrefix = options?.preservePrefix ?? (options?.preserveFirstLast ? 1 : 0);
|
|
167
|
+
const preserveSuffix = options?.preserveSuffix ?? (options?.preserveFirstLast ? 1 : 0);
|
|
168
|
+
const makeMasked = (term) => {
|
|
169
|
+
if (replaceWith) return replaceWith;
|
|
170
|
+
const len = term.length;
|
|
171
|
+
const pre = Math.max(0, Math.min(preservePrefix, len));
|
|
172
|
+
const suf = Math.max(0, Math.min(preserveSuffix, len - pre));
|
|
173
|
+
let mid = Math.max(0, len - pre - suf);
|
|
174
|
+
if (len >= 2 && mid === 0) {
|
|
175
|
+
const safePre = Math.max(0, Math.min(pre, len - 1));
|
|
176
|
+
return term.slice(0, safePre) + censorChar + term.slice(safePre + 1);
|
|
177
|
+
}
|
|
178
|
+
return term.slice(0, pre) + censorChar.repeat(mid) + term.slice(len - suf);
|
|
179
|
+
};
|
|
180
|
+
let out = text;
|
|
181
|
+
for (const m of result.matches) {
|
|
182
|
+
const term = m.term;
|
|
183
|
+
if (!term) continue;
|
|
184
|
+
const replacement = makeMasked(term);
|
|
185
|
+
const re = new RegExp(escapeRegExp(term), "gi");
|
|
186
|
+
out = out.replace(re, replacement);
|
|
187
|
+
}
|
|
188
|
+
return out;
|
|
189
|
+
}
|
|
190
|
+
expandTerms(terms) {
|
|
191
|
+
const out = [];
|
|
192
|
+
for (const t of terms) {
|
|
193
|
+
out.push(t);
|
|
194
|
+
for (const v of t.variants ?? []) {
|
|
195
|
+
out.push({ ...t, term: v });
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
return out;
|
|
199
|
+
}
|
|
200
|
+
normalizeForPack(text, pack) {
|
|
201
|
+
const steps = [
|
|
202
|
+
nfkc,
|
|
203
|
+
lower,
|
|
204
|
+
...pack.code === "en" ? [leetspeak] : [],
|
|
205
|
+
...this.opts.enableRepeatCollapse ? [collapseRepeats(2)] : [],
|
|
206
|
+
stripPunctuation,
|
|
207
|
+
collapseWhitespace,
|
|
208
|
+
...pack.normalizers ?? []
|
|
209
|
+
];
|
|
210
|
+
let s = text;
|
|
211
|
+
for (const step of steps) s = step.run(s);
|
|
212
|
+
return s;
|
|
213
|
+
}
|
|
214
|
+
};
|
|
215
|
+
function escapeRegExp(s) {
|
|
216
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
217
|
+
}
|
|
218
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
219
|
+
0 && (module.exports = {
|
|
220
|
+
ProfanityEngine,
|
|
221
|
+
collapseRepeats,
|
|
222
|
+
collapseWhitespace,
|
|
223
|
+
leetspeak,
|
|
224
|
+
lower,
|
|
225
|
+
nfkc,
|
|
226
|
+
stripPunctuation
|
|
227
|
+
});
|
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
// src/tokenize.ts
|
|
2
|
+
function tokenizeWords(text) {
|
|
3
|
+
const tokens = [];
|
|
4
|
+
let i = 0;
|
|
5
|
+
while (i < text.length) {
|
|
6
|
+
while (i < text.length && text[i] === " ") i++;
|
|
7
|
+
const start = i;
|
|
8
|
+
while (i < text.length && text[i] !== " ") i++;
|
|
9
|
+
const end = i;
|
|
10
|
+
if (end > start) {
|
|
11
|
+
tokens.push({ value: text.slice(start, end), start, end });
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
return tokens;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// src/normalizers.ts
|
|
18
|
+
var nfkc = {
|
|
19
|
+
name: "nfkc",
|
|
20
|
+
run: (s) => s.normalize("NFKC")
|
|
21
|
+
};
|
|
22
|
+
var lower = {
|
|
23
|
+
name: "lower",
|
|
24
|
+
run: (s) => s.toLowerCase()
|
|
25
|
+
};
|
|
26
|
+
var collapseWhitespace = {
|
|
27
|
+
name: "collapseWhitespace",
|
|
28
|
+
run: (s) => s.replace(/\s+/g, " ").trim()
|
|
29
|
+
};
|
|
30
|
+
var stripPunctuation = {
|
|
31
|
+
name: "stripPunctuation",
|
|
32
|
+
run: (s) => s.replace(/[^\p{L}\p{N}\u0900-\u097F ]+/gu, " ")
|
|
33
|
+
};
|
|
34
|
+
function collapseRepeats(maxRepeats = 2) {
|
|
35
|
+
return {
|
|
36
|
+
name: "collapseRepeats",
|
|
37
|
+
run: (s) => s.replace(/(.)\1{2,}/g, (_m, ch) => String(ch).repeat(maxRepeats))
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
var leetspeak = {
|
|
41
|
+
name: "leetspeak",
|
|
42
|
+
run: (s) => s.replace(/[@]/g, "a").replace(/[!]/g, "i").replace(/[0]/g, "o").replace(/[1|]/g, "i").replace(/[$]/g, "s")
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
// src/engine.ts
|
|
46
|
+
function uniq(arr) {
|
|
47
|
+
return Array.from(new Set(arr));
|
|
48
|
+
}
|
|
49
|
+
function clampText(text, max) {
|
|
50
|
+
if (text.length <= max) return text;
|
|
51
|
+
return text.slice(0, max);
|
|
52
|
+
}
|
|
53
|
+
var ProfanityEngine = class {
|
|
54
|
+
constructor(packs, options) {
|
|
55
|
+
this.packs = [];
|
|
56
|
+
this.packs = packs;
|
|
57
|
+
this.opts = {
|
|
58
|
+
severityThreshold: options?.severityThreshold ?? 1,
|
|
59
|
+
enabledLanguages: options?.enabledLanguages ?? [],
|
|
60
|
+
extraTerms: options?.extraTerms ?? [],
|
|
61
|
+
extraAllowlist: options?.extraAllowlist ?? [],
|
|
62
|
+
enableRepeatCollapse: options?.enableRepeatCollapse ?? true,
|
|
63
|
+
maxTextLength: options?.maxTextLength ?? 2e4
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
analyze(input) {
|
|
67
|
+
const text = clampText(input ?? "", this.opts.maxTextLength);
|
|
68
|
+
const enabled = this.opts.enabledLanguages.length ? this.packs.filter((p) => this.opts.enabledLanguages.includes(p.code)) : this.packs;
|
|
69
|
+
const matches = [];
|
|
70
|
+
for (const pack of enabled) {
|
|
71
|
+
const normalized = this.normalizeForPack(text, pack);
|
|
72
|
+
const allow = new Set(uniq([...pack.allowlist ?? [], ...this.opts.extraAllowlist]).map((x) => this.normalizeForPack(x, pack)));
|
|
73
|
+
const allTerms = [...pack.terms, ...this.opts.extraTerms];
|
|
74
|
+
const expanded = this.expandTerms(allTerms);
|
|
75
|
+
const tokens = tokenizeWords(normalized);
|
|
76
|
+
const wordIndex = /* @__PURE__ */ new Map();
|
|
77
|
+
for (const t of expanded.filter((x) => (x.match ?? "word") === "word")) {
|
|
78
|
+
const key = this.normalizeForPack(t.term, pack);
|
|
79
|
+
if (!wordIndex.has(key)) wordIndex.set(key, []);
|
|
80
|
+
wordIndex.get(key).push(t);
|
|
81
|
+
}
|
|
82
|
+
for (const tok of tokens) {
|
|
83
|
+
if (allow.has(tok.value)) continue;
|
|
84
|
+
const hit = wordIndex.get(tok.value);
|
|
85
|
+
if (hit) {
|
|
86
|
+
for (const h of hit) {
|
|
87
|
+
if (h.severity < this.opts.severityThreshold) continue;
|
|
88
|
+
matches.push({
|
|
89
|
+
pack: pack.code,
|
|
90
|
+
term: h.term,
|
|
91
|
+
severity: h.severity,
|
|
92
|
+
category: h.category ?? [],
|
|
93
|
+
index: tok.start
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
const phrases = expanded.filter((x) => (x.match ?? "word") === "phrase");
|
|
99
|
+
for (const ph of phrases) {
|
|
100
|
+
const needle = this.normalizeForPack(ph.term, pack);
|
|
101
|
+
if (!needle || allow.has(needle)) continue;
|
|
102
|
+
let idx = normalized.indexOf(needle);
|
|
103
|
+
while (idx !== -1) {
|
|
104
|
+
if (ph.severity >= this.opts.severityThreshold) {
|
|
105
|
+
matches.push({
|
|
106
|
+
pack: pack.code,
|
|
107
|
+
term: ph.term,
|
|
108
|
+
severity: ph.severity,
|
|
109
|
+
category: ph.category ?? [],
|
|
110
|
+
index: idx
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
idx = normalized.indexOf(needle, idx + needle.length);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
const maxSeverity = matches.length ? matches.reduce((m, x) => x.severity > m ? x.severity : m, 0) : 0;
|
|
118
|
+
const score = matches.length ? Math.min(100, matches.length * 15 + maxSeverity * 10) : 0;
|
|
119
|
+
return {
|
|
120
|
+
profane: matches.length > 0,
|
|
121
|
+
score,
|
|
122
|
+
maxSeverity,
|
|
123
|
+
matches
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
isProfane(text) {
|
|
127
|
+
return this.analyze(text).profane;
|
|
128
|
+
}
|
|
129
|
+
censor(text, options) {
|
|
130
|
+
const result = this.analyze(text);
|
|
131
|
+
if (!result.profane) return text;
|
|
132
|
+
const censorChar = options?.censorChar ?? "*";
|
|
133
|
+
const replaceWith = options?.replaceWith;
|
|
134
|
+
const preservePrefix = options?.preservePrefix ?? (options?.preserveFirstLast ? 1 : 0);
|
|
135
|
+
const preserveSuffix = options?.preserveSuffix ?? (options?.preserveFirstLast ? 1 : 0);
|
|
136
|
+
const makeMasked = (term) => {
|
|
137
|
+
if (replaceWith) return replaceWith;
|
|
138
|
+
const len = term.length;
|
|
139
|
+
const pre = Math.max(0, Math.min(preservePrefix, len));
|
|
140
|
+
const suf = Math.max(0, Math.min(preserveSuffix, len - pre));
|
|
141
|
+
let mid = Math.max(0, len - pre - suf);
|
|
142
|
+
if (len >= 2 && mid === 0) {
|
|
143
|
+
const safePre = Math.max(0, Math.min(pre, len - 1));
|
|
144
|
+
return term.slice(0, safePre) + censorChar + term.slice(safePre + 1);
|
|
145
|
+
}
|
|
146
|
+
return term.slice(0, pre) + censorChar.repeat(mid) + term.slice(len - suf);
|
|
147
|
+
};
|
|
148
|
+
let out = text;
|
|
149
|
+
for (const m of result.matches) {
|
|
150
|
+
const term = m.term;
|
|
151
|
+
if (!term) continue;
|
|
152
|
+
const replacement = makeMasked(term);
|
|
153
|
+
const re = new RegExp(escapeRegExp(term), "gi");
|
|
154
|
+
out = out.replace(re, replacement);
|
|
155
|
+
}
|
|
156
|
+
return out;
|
|
157
|
+
}
|
|
158
|
+
expandTerms(terms) {
|
|
159
|
+
const out = [];
|
|
160
|
+
for (const t of terms) {
|
|
161
|
+
out.push(t);
|
|
162
|
+
for (const v of t.variants ?? []) {
|
|
163
|
+
out.push({ ...t, term: v });
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
return out;
|
|
167
|
+
}
|
|
168
|
+
normalizeForPack(text, pack) {
|
|
169
|
+
const steps = [
|
|
170
|
+
nfkc,
|
|
171
|
+
lower,
|
|
172
|
+
...pack.code === "en" ? [leetspeak] : [],
|
|
173
|
+
...this.opts.enableRepeatCollapse ? [collapseRepeats(2)] : [],
|
|
174
|
+
stripPunctuation,
|
|
175
|
+
collapseWhitespace,
|
|
176
|
+
...pack.normalizers ?? []
|
|
177
|
+
];
|
|
178
|
+
let s = text;
|
|
179
|
+
for (const step of steps) s = step.run(s);
|
|
180
|
+
return s;
|
|
181
|
+
}
|
|
182
|
+
};
|
|
183
|
+
function escapeRegExp(s) {
|
|
184
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
185
|
+
}
|
|
186
|
+
export {
|
|
187
|
+
ProfanityEngine,
|
|
188
|
+
collapseRepeats,
|
|
189
|
+
collapseWhitespace,
|
|
190
|
+
leetspeak,
|
|
191
|
+
lower,
|
|
192
|
+
nfkc,
|
|
193
|
+
stripPunctuation
|
|
194
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@shankarkharel/profanity-core",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Core profanity moderation engine (language pack based).",
|
|
5
|
+
"main": "dist/index.cjs",
|
|
6
|
+
"module": "dist/index.js",
|
|
7
|
+
"types": "dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"import": "./dist/index.mjs",
|
|
12
|
+
"require": "./dist/index.cjs"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
"files": [
|
|
16
|
+
"dist"
|
|
17
|
+
],
|
|
18
|
+
"scripts": {
|
|
19
|
+
"build": "tsup src/index.ts --format cjs,esm --dts --clean",
|
|
20
|
+
"test": "yarn build && node --test dist",
|
|
21
|
+
"lint": "echo \"(add eslint later)\""
|
|
22
|
+
},
|
|
23
|
+
"publishConfig": {
|
|
24
|
+
"access": "public"
|
|
25
|
+
},
|
|
26
|
+
"dependencies": {},
|
|
27
|
+
"license": "MIT"
|
|
28
|
+
}
|