@shankarkharel/profanity-lang-ne-rom 1.0.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +490 -0
- package/package.json +2 -2
package/README.md
ADDED
|
@@ -0,0 +1,490 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
````md
|
|
4
|
+
# Profanity Filter (TypeScript)
|
|
5
|
+
|
|
6
|
+
A small profanity detection + censoring engine with language packs (English + Nepali Devanagari + Nepali Romanized), and a NestJS integration package.
|
|
7
|
+
|
|
8
|
+
## Packages (npm)
|
|
9
|
+
|
|
10
|
+
- `@shankarkharel/profanity-core` — core engine
|
|
11
|
+
- `@shankarkharel/profanity-lang-en` — English pack
|
|
12
|
+
- `@shankarkharel/profanity-lang-ne` — Nepali (Devanagari) pack
|
|
13
|
+
- `@shankarkharel/profanity-lang-ne-rom` — Nepali (Romanized) pack
|
|
14
|
+
- `@shankarkharel/profanity-nest` — NestJS integration
|
|
15
|
+
|
|
16
|
+
## Install
|
|
17
|
+
|
|
18
|
+
### Node / TypeScript
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
npm i @shankarkharel/profanity-core @shankarkharel/profanity-lang-en
|
|
22
|
+
# optional Nepali packs
|
|
23
|
+
npm i @shankarkharel/profanity-lang-ne @shankarkharel/profanity-lang-ne-rom
|
|
24
|
+
````
|
|
25
|
+
|
|
26
|
+
### NestJS
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
npm i @shankarkharel/profanity-nest @shankarkharel/profanity-core @shankarkharel/profanity-lang-en
|
|
30
|
+
# optional
|
|
31
|
+
npm i @shankarkharel/profanity-lang-ne @shankarkharel/profanity-lang-ne-rom
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Quick Start
|
|
35
|
+
|
|
36
|
+
```ts
|
|
37
|
+
import { ProfanityEngine } from "@shankarkharel/profanity-core";
|
|
38
|
+
import en from "@shankarkharel/profanity-lang-en";
|
|
39
|
+
import ne from "@shankarkharel/profanity-lang-ne";
|
|
40
|
+
import neRom from "@shankarkharel/profanity-lang-ne-rom";
|
|
41
|
+
|
|
42
|
+
const engine = new ProfanityEngine([en, ne, neRom], {
|
|
43
|
+
severityThreshold: 1,
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
const text = "this is crap and गधा and kutta";
|
|
47
|
+
console.log(engine.analyze(text));
|
|
48
|
+
console.log(engine.censor(text, { preserveFirstLast: true }));
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Links
|
|
52
|
+
|
|
53
|
+
* Repo: YOUR_REPO_URL
|
|
54
|
+
* Issues: YOUR_REPO_URL/issues
|
|
55
|
+
|
|
56
|
+
````
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## ✅ `packages/profanity-core/README.md`
|
|
61
|
+
|
|
62
|
+
```md
|
|
63
|
+
# @shankarkharel/profanity-core
|
|
64
|
+
|
|
65
|
+
Core profanity detection + censoring engine.
|
|
66
|
+
|
|
67
|
+
## Install
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
npm i @shankarkharel/profanity-core
|
|
71
|
+
````
|
|
72
|
+
|
|
73
|
+
You typically install at least one language pack too:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
npm i @shankarkharel/profanity-lang-en
|
|
77
|
+
# optional:
|
|
78
|
+
npm i @shankarkharel/profanity-lang-ne @shankarkharel/profanity-lang-ne-rom
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Usage
|
|
82
|
+
|
|
83
|
+
```ts
|
|
84
|
+
import { ProfanityEngine } from "@shankarkharel/profanity-core";
|
|
85
|
+
import en from "@shankarkharel/profanity-lang-en";
|
|
86
|
+
|
|
87
|
+
const engine = new ProfanityEngine([en]);
|
|
88
|
+
|
|
89
|
+
console.log(engine.isProfane("this is clean")); // false
|
|
90
|
+
console.log(engine.isProfane("this is crap")); // true
|
|
91
|
+
|
|
92
|
+
const result = engine.analyze("this is crap");
|
|
93
|
+
console.log(result.profane); // true
|
|
94
|
+
console.log(result.maxSeverity); // depends on pack
|
|
95
|
+
console.log(result.matches); // list of matches
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## API
|
|
99
|
+
|
|
100
|
+
### `new ProfanityEngine(packs, options?)`
|
|
101
|
+
|
|
102
|
+
```ts
|
|
103
|
+
const engine = new ProfanityEngine(packs, options);
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
* `packs: LanguagePack[]` — language packs to load (e.g. English, Nepali)
|
|
107
|
+
* `options?: EngineOptions`
|
|
108
|
+
|
|
109
|
+
#### EngineOptions
|
|
110
|
+
|
|
111
|
+
```ts
|
|
112
|
+
export interface EngineOptions {
|
|
113
|
+
severityThreshold?: 1 | 2 | 3 | 4 | 5; // default 1
|
|
114
|
+
enabledLanguages?: string[]; // if set, only these pack codes
|
|
115
|
+
extraTerms?: TermEntry[]; // app-specific extra terms
|
|
116
|
+
extraAllowlist?: string[]; // app-specific allowlist (never match)
|
|
117
|
+
enableRepeatCollapse?: boolean; // default true (collapses repeated chars)
|
|
118
|
+
maxTextLength?: number; // default 20_000 (safety)
|
|
119
|
+
}
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Examples:
|
|
123
|
+
|
|
124
|
+
```ts
|
|
125
|
+
// Only analyze English
|
|
126
|
+
const engine = new ProfanityEngine([en, ne, neRom], {
|
|
127
|
+
enabledLanguages: ["en"],
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
// Require stronger severity
|
|
131
|
+
const engine = new ProfanityEngine([en], {
|
|
132
|
+
severityThreshold: 3,
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
// Add your own terms
|
|
136
|
+
const engine = new ProfanityEngine([en], {
|
|
137
|
+
extraTerms: [
|
|
138
|
+
{ term: "dummybad", severity: 1, category: ["custom"], match: "word" },
|
|
139
|
+
{ term: "very bad phrase", severity: 3, category: ["custom"], match: "phrase" },
|
|
140
|
+
],
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
// Allow certain words
|
|
144
|
+
const engine = new ProfanityEngine([en], {
|
|
145
|
+
extraAllowlist: ["assistant", "class"],
|
|
146
|
+
});
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
### `engine.analyze(text): AnalyzeResult`
|
|
152
|
+
|
|
153
|
+
Analyzes text and returns details.
|
|
154
|
+
|
|
155
|
+
```ts
|
|
156
|
+
const res = engine.analyze("this is crap");
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
|
|
161
|
+
```ts
|
|
162
|
+
export interface AnalyzeResult {
|
|
163
|
+
profane: boolean;
|
|
164
|
+
score: number; // 0..100
|
|
165
|
+
maxSeverity: 0 | 1 | 2 | 3 | 4 | 5;
|
|
166
|
+
matches: MatchDetail[];
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
export interface MatchDetail {
|
|
170
|
+
pack: string; // language code (e.g. "en")
|
|
171
|
+
term: string; // canonical term from the pack
|
|
172
|
+
severity: 1 | 2 | 3 | 4 | 5;
|
|
173
|
+
category: string[];
|
|
174
|
+
index: number; // char index in normalized text (best-effort)
|
|
175
|
+
}
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
---
|
|
179
|
+
|
|
180
|
+
### `engine.isProfane(text): boolean`
|
|
181
|
+
|
|
182
|
+
Convenience wrapper:
|
|
183
|
+
|
|
184
|
+
```ts
|
|
185
|
+
engine.isProfane("hello"); // false
|
|
186
|
+
engine.isProfane("this is crap"); // true
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
---
|
|
190
|
+
|
|
191
|
+
### `engine.censor(text, options?): string`
|
|
192
|
+
|
|
193
|
+
Censors matched terms in the **original text** (best-effort replacement).
|
|
194
|
+
|
|
195
|
+
```ts
|
|
196
|
+
engine.censor("this is crap");
|
|
197
|
+
// "this is ****" (depends on term length)
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
#### Censor options
|
|
201
|
+
|
|
202
|
+
Your implementation supports:
|
|
203
|
+
|
|
204
|
+
* `censorChar?: string` — default `"*"`
|
|
205
|
+
* `replaceWith?: string` — if provided, replaces term with this fixed token
|
|
206
|
+
* `preserveFirstLast?: boolean` — legacy shortcut (preserve 1 prefix + 1 suffix)
|
|
207
|
+
* `preservePrefix?: number` — keep first N characters
|
|
208
|
+
* `preserveSuffix?: number` — keep last N characters
|
|
209
|
+
|
|
210
|
+
Examples:
|
|
211
|
+
|
|
212
|
+
```ts
|
|
213
|
+
engine.censor("this is crap", { censorChar: "#" });
|
|
214
|
+
// "this is ####"
|
|
215
|
+
|
|
216
|
+
engine.censor("this is crap", { preserveFirstLast: true });
|
|
217
|
+
// "this is c**p"
|
|
218
|
+
|
|
219
|
+
engine.censor("this is crap", { preservePrefix: 2, preserveSuffix: 1 });
|
|
220
|
+
// "this is cr*p"
|
|
221
|
+
|
|
222
|
+
engine.censor("this is crap", { replaceWith: "[censored]" });
|
|
223
|
+
// "this is [censored]"
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
---
|
|
227
|
+
|
|
228
|
+
## Matching behavior
|
|
229
|
+
|
|
230
|
+
Each term has `match`:
|
|
231
|
+
|
|
232
|
+
* `"word"` (default): token-based word matching after normalization
|
|
233
|
+
* `"phrase"`: substring matching on normalized text
|
|
234
|
+
|
|
235
|
+
> Important: `index` returned is on **normalized text**, and censoring currently does a best-effort replace in the original text (regex replace of matched canonical terms). For very advanced use (precise original indices), a future improvement would map normalized indices to original indices.
|
|
236
|
+
|
|
237
|
+
---
|
|
238
|
+
|
|
239
|
+
## Normalization pipeline
|
|
240
|
+
|
|
241
|
+
Before matching, text is normalized with:
|
|
242
|
+
|
|
243
|
+
* NFKC normalization
|
|
244
|
+
* lowercasing
|
|
245
|
+
* (English only) leetspeak normalization
|
|
246
|
+
* (optional) repeat collapse (default enabled)
|
|
247
|
+
* punctuation stripping
|
|
248
|
+
* whitespace collapse
|
|
249
|
+
* plus any `pack.normalizers` you provide
|
|
250
|
+
|
|
251
|
+
This helps catch variations like repeated letters, extra punctuation, etc.
|
|
252
|
+
|
|
253
|
+
---
|
|
254
|
+
|
|
255
|
+
## Creating a custom Language Pack
|
|
256
|
+
|
|
257
|
+
A language pack is:
|
|
258
|
+
|
|
259
|
+
```ts
|
|
260
|
+
export interface LanguagePack {
|
|
261
|
+
code: string; // "en", "ne", "ne-rom"
|
|
262
|
+
version: string;
|
|
263
|
+
terms: TermEntry[];
|
|
264
|
+
allowlist?: string[];
|
|
265
|
+
normalizers?: NormalizerStep[];
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
export interface TermEntry {
|
|
269
|
+
term: string; // canonical form
|
|
270
|
+
severity: 1 | 2 | 3 | 4 | 5; // 1 mild ... 5 extreme
|
|
271
|
+
category?: string[]; // e.g. ["insult", "sexual", "slur"]
|
|
272
|
+
match?: "word" | "phrase"; // default "word"
|
|
273
|
+
variants?: string[]; // additional spellings/romanizations
|
|
274
|
+
}
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
Example pack:
|
|
278
|
+
|
|
279
|
+
```ts
|
|
280
|
+
import type { LanguagePack } from "@shankarkharel/profanity-core";
|
|
281
|
+
|
|
282
|
+
const myPack: LanguagePack = {
|
|
283
|
+
code: "my-lang",
|
|
284
|
+
version: "1.0.0",
|
|
285
|
+
allowlist: ["assistant"],
|
|
286
|
+
terms: [
|
|
287
|
+
{ term: "badword", severity: 3, category: ["general"], match: "word" },
|
|
288
|
+
{ term: "very bad phrase", severity: 4, category: ["general"], match: "phrase" },
|
|
289
|
+
{ term: "kutta", severity: 3, category: ["insult"], variants: ["kuttaaa"] },
|
|
290
|
+
],
|
|
291
|
+
};
|
|
292
|
+
|
|
293
|
+
export default myPack;
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
Then:
|
|
297
|
+
|
|
298
|
+
```ts
|
|
299
|
+
const engine = new ProfanityEngine([myPack]);
|
|
300
|
+
engine.analyze("badword here");
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
---
|
|
304
|
+
|
|
305
|
+
## License
|
|
306
|
+
|
|
307
|
+
MIT (or your chosen license)
|
|
308
|
+
|
|
309
|
+
````
|
|
310
|
+
|
|
311
|
+
---
|
|
312
|
+
|
|
313
|
+
## ✅ `packages/profanity-lang-en/README.md`
|
|
314
|
+
|
|
315
|
+
```md
|
|
316
|
+
# @shankarkharel/profanity-lang-en
|
|
317
|
+
|
|
318
|
+
English profanity language pack for `@shankarkharel/profanity-core`.
|
|
319
|
+
|
|
320
|
+
## Install
|
|
321
|
+
|
|
322
|
+
```bash
|
|
323
|
+
npm i @shankarkharel/profanity-core @shankarkharel/profanity-lang-en
|
|
324
|
+
````
|
|
325
|
+
|
|
326
|
+
## Usage
|
|
327
|
+
|
|
328
|
+
```ts
|
|
329
|
+
import { ProfanityEngine } from "@shankarkharel/profanity-core";
|
|
330
|
+
import en from "@shankarkharel/profanity-lang-en";
|
|
331
|
+
|
|
332
|
+
const engine = new ProfanityEngine([en]);
|
|
333
|
+
|
|
334
|
+
console.log(engine.analyze("this is crap"));
|
|
335
|
+
console.log(engine.censor("this is crap", { preserveFirstLast: true }));
|
|
336
|
+
```
|
|
337
|
+
|
|
338
|
+
## Notes
|
|
339
|
+
|
|
340
|
+
* Matching is normalized (lowercase, punctuation stripped, whitespace collapsed).
|
|
341
|
+
* English also applies leetspeak normalization.
|
|
342
|
+
|
|
343
|
+
````
|
|
344
|
+
|
|
345
|
+
---
|
|
346
|
+
|
|
347
|
+
## ✅ `packages/profanity-lang-ne/README.md`
|
|
348
|
+
|
|
349
|
+
```md
|
|
350
|
+
# @shankarkharel/profanity-lang-ne
|
|
351
|
+
|
|
352
|
+
Nepali profanity language pack (Devanagari) for `@shankarkharel/profanity-core`.
|
|
353
|
+
|
|
354
|
+
## Install
|
|
355
|
+
|
|
356
|
+
```bash
|
|
357
|
+
npm i @shankarkharel/profanity-core @shankarkharel/profanity-lang-ne
|
|
358
|
+
````
|
|
359
|
+
|
|
360
|
+
## Usage
|
|
361
|
+
|
|
362
|
+
```ts
|
|
363
|
+
import { ProfanityEngine } from "@shankarkharel/profanity-core";
|
|
364
|
+
import ne from "@shankarkharel/profanity-lang-ne";
|
|
365
|
+
|
|
366
|
+
const engine = new ProfanityEngine([ne]);
|
|
367
|
+
|
|
368
|
+
const text = "तँ गधा हो?";
|
|
369
|
+
console.log(engine.analyze(text));
|
|
370
|
+
console.log(engine.censor(text, { replaceWith: "[censored]" }));
|
|
371
|
+
```
|
|
372
|
+
|
|
373
|
+
## Tips
|
|
374
|
+
|
|
375
|
+
* This pack targets Nepali written in **Devanagari**.
|
|
376
|
+
* For Romanized Nepali (e.g. `kutta`, `sale`) use `@shankarkharel/profanity-lang-ne-rom`.
|
|
377
|
+
|
|
378
|
+
````
|
|
379
|
+
|
|
380
|
+
---
|
|
381
|
+
|
|
382
|
+
## ✅ `packages/profanity-lang-ne-rom/README.md`
|
|
383
|
+
|
|
384
|
+
```md
|
|
385
|
+
# @shankarkharel/profanity-lang-ne-rom
|
|
386
|
+
|
|
387
|
+
Nepali profanity language pack (Romanized) for `@shankarkharel/profanity-core`.
|
|
388
|
+
|
|
389
|
+
## Install
|
|
390
|
+
|
|
391
|
+
```bash
|
|
392
|
+
npm i @shankarkharel/profanity-core @shankarkharel/profanity-lang-ne-rom
|
|
393
|
+
````
|
|
394
|
+
|
|
395
|
+
## Usage
|
|
396
|
+
|
|
397
|
+
```ts
|
|
398
|
+
import { ProfanityEngine } from "@shankarkharel/profanity-core";
|
|
399
|
+
import neRom from "@shankarkharel/profanity-lang-ne-rom";
|
|
400
|
+
|
|
401
|
+
const engine = new ProfanityEngine([neRom]);
|
|
402
|
+
|
|
403
|
+
const text = "kutta and sale";
|
|
404
|
+
console.log(engine.analyze(text));
|
|
405
|
+
console.log(engine.censor(text, { preservePrefix: 1, preserveSuffix: 1 }));
|
|
406
|
+
```
|
|
407
|
+
|
|
408
|
+
## Tips
|
|
409
|
+
|
|
410
|
+
* This pack targets Nepali profanity written in **Latin/Roman letters**.
|
|
411
|
+
* For Devanagari Nepali use `@shankarkharel/profanity-lang-ne`.
|
|
412
|
+
|
|
413
|
+
````
|
|
414
|
+
|
|
415
|
+
---
|
|
416
|
+
|
|
417
|
+
## ✅ `packages/profanity-nest/README.md` (NestJS)
|
|
418
|
+
|
|
419
|
+
**Important:** I’m writing this in a standard way. If your Nest package exports different names than below, paste your `packages/profanity-nest/src/index.ts` and I’ll make it exact.
|
|
420
|
+
|
|
421
|
+
```md
|
|
422
|
+
# @shankarkharel/profanity-nest
|
|
423
|
+
|
|
424
|
+
NestJS integration for `@shankarkharel/profanity-core`.
|
|
425
|
+
|
|
426
|
+
## Install
|
|
427
|
+
|
|
428
|
+
```bash
|
|
429
|
+
npm i @shankarkharel/profanity-nest @shankarkharel/profanity-core
|
|
430
|
+
npm i @shankarkharel/profanity-lang-en
|
|
431
|
+
# optional:
|
|
432
|
+
npm i @shankarkharel/profanity-lang-ne @shankarkharel/profanity-lang-ne-rom
|
|
433
|
+
````
|
|
434
|
+
|
|
435
|
+
## Setup
|
|
436
|
+
|
|
437
|
+
```ts
|
|
438
|
+
import { Module } from "@nestjs/common";
|
|
439
|
+
import { ProfanityModule } from "@shankarkharel/profanity-nest";
|
|
440
|
+
|
|
441
|
+
import en from "@shankarkharel/profanity-lang-en";
|
|
442
|
+
import ne from "@shankarkharel/profanity-lang-ne";
|
|
443
|
+
import neRom from "@shankarkharel/profanity-lang-ne-rom";
|
|
444
|
+
|
|
445
|
+
@Module({
|
|
446
|
+
imports: [
|
|
447
|
+
ProfanityModule.forRoot({
|
|
448
|
+
packs: [en, ne, neRom],
|
|
449
|
+
options: {
|
|
450
|
+
severityThreshold: 1,
|
|
451
|
+
},
|
|
452
|
+
}),
|
|
453
|
+
],
|
|
454
|
+
})
|
|
455
|
+
export class AppModule {}
|
|
456
|
+
```
|
|
457
|
+
|
|
458
|
+
## Use in a service/controller
|
|
459
|
+
|
|
460
|
+
```ts
|
|
461
|
+
import { Controller, Get } from "@nestjs/common";
|
|
462
|
+
import { ProfanityService } from "@shankarkharel/profanity-nest";
|
|
463
|
+
|
|
464
|
+
@Controller()
|
|
465
|
+
export class AppController {
|
|
466
|
+
constructor(private readonly profanity: ProfanityService) {}
|
|
467
|
+
|
|
468
|
+
@Get("check")
|
|
469
|
+
check() {
|
|
470
|
+
const text = "this is crap and kutta";
|
|
471
|
+
|
|
472
|
+
return {
|
|
473
|
+
analysis: this.profanity.analyze(text),
|
|
474
|
+
censored: this.profanity.censor(text, { preserveFirstLast: true }),
|
|
475
|
+
profane: this.profanity.isProfane(text),
|
|
476
|
+
};
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
```
|
|
480
|
+
|
|
481
|
+
## What it does
|
|
482
|
+
|
|
483
|
+
* Provides a singleton `ProfanityEngine` configured with packs/options
|
|
484
|
+
* Exposes `analyze`, `isProfane`, `censor` via injectable service
|
|
485
|
+
|
|
486
|
+
## License
|
|
487
|
+
|
|
488
|
+
MIT
|
|
489
|
+
|
|
490
|
+
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@shankarkharel/profanity-lang-ne-rom",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "3.0.0",
|
|
4
4
|
"main": "dist/index.cjs",
|
|
5
5
|
"module": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
"lint": "echo \"(add eslint later)\""
|
|
23
23
|
},
|
|
24
24
|
"dependencies": {
|
|
25
|
-
"@shankarkharel/profanity-core": "
|
|
25
|
+
"@shankarkharel/profanity-core": "3.0.0"
|
|
26
26
|
},
|
|
27
27
|
"publishConfig": {
|
|
28
28
|
"access": "public"
|