@shankarkharel/profanity-lang-ne-rom 1.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/README.md +490 -0
  2. package/package.json +2 -2
package/README.md ADDED
@@ -0,0 +1,490 @@
1
+
2
+
3
+ ````md
4
+ # Profanity Filter (TypeScript)
5
+
6
+ A small profanity detection + censoring engine with language packs (English + Nepali Devanagari + Nepali Romanized), and a NestJS integration package.
7
+
8
+ ## Packages (npm)
9
+
10
+ - `@shankarkharel/profanity-core` — core engine
11
+ - `@shankarkharel/profanity-lang-en` — English pack
12
+ - `@shankarkharel/profanity-lang-ne` — Nepali (Devanagari) pack
13
+ - `@shankarkharel/profanity-lang-ne-rom` — Nepali (Romanized) pack
14
+ - `@shankarkharel/profanity-nest` — NestJS integration
15
+
16
+ ## Install
17
+
18
+ ### Node / TypeScript
19
+
20
+ ```bash
21
+ npm i @shankarkharel/profanity-core @shankarkharel/profanity-lang-en
22
+ # optional Nepali packs
23
+ npm i @shankarkharel/profanity-lang-ne @shankarkharel/profanity-lang-ne-rom
24
+ ````
25
+
26
+ ### NestJS
27
+
28
+ ```bash
29
+ npm i @shankarkharel/profanity-nest @shankarkharel/profanity-core @shankarkharel/profanity-lang-en
30
+ # optional
31
+ npm i @shankarkharel/profanity-lang-ne @shankarkharel/profanity-lang-ne-rom
32
+ ```
33
+
34
+ ## Quick Start
35
+
36
+ ```ts
37
+ import { ProfanityEngine } from "@shankarkharel/profanity-core";
38
+ import en from "@shankarkharel/profanity-lang-en";
39
+ import ne from "@shankarkharel/profanity-lang-ne";
40
+ import neRom from "@shankarkharel/profanity-lang-ne-rom";
41
+
42
+ const engine = new ProfanityEngine([en, ne, neRom], {
43
+ severityThreshold: 1,
44
+ });
45
+
46
+ const text = "this is crap and गधा and kutta";
47
+ console.log(engine.analyze(text));
48
+ console.log(engine.censor(text, { preserveFirstLast: true }));
49
+ ```
50
+
51
+ ## Links
52
+
53
+ * Repo: YOUR_REPO_URL
54
+ * Issues: YOUR_REPO_URL/issues
55
+
56
+ ````
57
+
58
+ ---
59
+
60
+ ## ✅ `packages/profanity-core/README.md`
61
+
62
+ ```md
63
+ # @shankarkharel/profanity-core
64
+
65
+ Core profanity detection + censoring engine.
66
+
67
+ ## Install
68
+
69
+ ```bash
70
+ npm i @shankarkharel/profanity-core
71
+ ````
72
+
73
+ You typically install at least one language pack too:
74
+
75
+ ```bash
76
+ npm i @shankarkharel/profanity-lang-en
77
+ # optional:
78
+ npm i @shankarkharel/profanity-lang-ne @shankarkharel/profanity-lang-ne-rom
79
+ ```
80
+
81
+ ## Usage
82
+
83
+ ```ts
84
+ import { ProfanityEngine } from "@shankarkharel/profanity-core";
85
+ import en from "@shankarkharel/profanity-lang-en";
86
+
87
+ const engine = new ProfanityEngine([en]);
88
+
89
+ console.log(engine.isProfane("this is clean")); // false
90
+ console.log(engine.isProfane("this is crap")); // true
91
+
92
+ const result = engine.analyze("this is crap");
93
+ console.log(result.profane); // true
94
+ console.log(result.maxSeverity); // depends on pack
95
+ console.log(result.matches); // list of matches
96
+ ```
97
+
98
+ ## API
99
+
100
+ ### `new ProfanityEngine(packs, options?)`
101
+
102
+ ```ts
103
+ const engine = new ProfanityEngine(packs, options);
104
+ ```
105
+
106
+ * `packs: LanguagePack[]` — language packs to load (e.g. English, Nepali)
107
+ * `options?: EngineOptions`
108
+
109
+ #### EngineOptions
110
+
111
+ ```ts
112
+ export interface EngineOptions {
113
+ severityThreshold?: 1 | 2 | 3 | 4 | 5; // default 1
114
+ enabledLanguages?: string[]; // if set, only these pack codes
115
+ extraTerms?: TermEntry[]; // app-specific extra terms
116
+ extraAllowlist?: string[]; // app-specific allowlist (never match)
117
+ enableRepeatCollapse?: boolean; // default true (collapses repeated chars)
118
+ maxTextLength?: number; // default 20_000 (safety)
119
+ }
120
+ ```
121
+
122
+ Examples:
123
+
124
+ ```ts
125
+ // Only analyze English
126
+ const engine = new ProfanityEngine([en, ne, neRom], {
127
+ enabledLanguages: ["en"],
128
+ });
129
+
130
+ // Require stronger severity
131
+ const engine = new ProfanityEngine([en], {
132
+ severityThreshold: 3,
133
+ });
134
+
135
+ // Add your own terms
136
+ const engine = new ProfanityEngine([en], {
137
+ extraTerms: [
138
+ { term: "dummybad", severity: 1, category: ["custom"], match: "word" },
139
+ { term: "very bad phrase", severity: 3, category: ["custom"], match: "phrase" },
140
+ ],
141
+ });
142
+
143
+ // Allow certain words
144
+ const engine = new ProfanityEngine([en], {
145
+ extraAllowlist: ["assistant", "class"],
146
+ });
147
+ ```
148
+
149
+ ---
150
+
151
+ ### `engine.analyze(text): AnalyzeResult`
152
+
153
+ Analyzes text and returns details.
154
+
155
+ ```ts
156
+ const res = engine.analyze("this is crap");
157
+ ```
158
+
159
+ Returns:
160
+
161
+ ```ts
162
+ export interface AnalyzeResult {
163
+ profane: boolean;
164
+ score: number; // 0..100
165
+ maxSeverity: 0 | 1 | 2 | 3 | 4 | 5;
166
+ matches: MatchDetail[];
167
+ }
168
+
169
+ export interface MatchDetail {
170
+ pack: string; // language code (e.g. "en")
171
+ term: string; // canonical term from the pack
172
+ severity: 1 | 2 | 3 | 4 | 5;
173
+ category: string[];
174
+ index: number; // char index in normalized text (best-effort)
175
+ }
176
+ ```
177
+
178
+ ---
179
+
180
+ ### `engine.isProfane(text): boolean`
181
+
182
+ Convenience wrapper:
183
+
184
+ ```ts
185
+ engine.isProfane("hello"); // false
186
+ engine.isProfane("this is crap"); // true
187
+ ```
188
+
189
+ ---
190
+
191
+ ### `engine.censor(text, options?): string`
192
+
193
+ Censors matched terms in the **original text** (best-effort replacement).
194
+
195
+ ```ts
196
+ engine.censor("this is crap");
197
+ // "this is ****" (depends on term length)
198
+ ```
199
+
200
+ #### Censor options
201
+
202
+ Your implementation supports:
203
+
204
+ * `censorChar?: string` — default `"*"`
205
+ * `replaceWith?: string` — if provided, replaces term with this fixed token
206
+ * `preserveFirstLast?: boolean` — legacy shortcut (preserve 1 prefix + 1 suffix)
207
+ * `preservePrefix?: number` — keep first N characters
208
+ * `preserveSuffix?: number` — keep last N characters
209
+
210
+ Examples:
211
+
212
+ ```ts
213
+ engine.censor("this is crap", { censorChar: "#" });
214
+ // "this is ####"
215
+
216
+ engine.censor("this is crap", { preserveFirstLast: true });
217
+ // "this is c**p"
218
+
219
+ engine.censor("this is crap", { preservePrefix: 2, preserveSuffix: 1 });
220
+ // "this is cr*p"
221
+
222
+ engine.censor("this is crap", { replaceWith: "[censored]" });
223
+ // "this is [censored]"
224
+ ```
225
+
226
+ ---
227
+
228
+ ## Matching behavior
229
+
230
+ Each term has `match`:
231
+
232
+ * `"word"` (default): token-based word matching after normalization
233
+ * `"phrase"`: substring matching on normalized text
234
+
235
+ > Important: `index` returned is on **normalized text**, and censoring currently does a best-effort replace in the original text (regex replace of matched canonical terms). For very advanced use (precise original indices), a future improvement would map normalized indices to original indices.
236
+
237
+ ---
238
+
239
+ ## Normalization pipeline
240
+
241
+ Before matching, text is normalized with:
242
+
243
+ * NFKC normalization
244
+ * lowercasing
245
+ * (English only) leetspeak normalization
246
+ * (optional) repeat collapse (default enabled)
247
+ * punctuation stripping
248
+ * whitespace collapse
249
+ * plus any `pack.normalizers` you provide
250
+
251
+ This helps catch variations like repeated letters, extra punctuation, etc.
252
+
253
+ ---
254
+
255
+ ## Creating a custom Language Pack
256
+
257
+ A language pack is:
258
+
259
+ ```ts
260
+ export interface LanguagePack {
261
+ code: string; // "en", "ne", "ne-rom"
262
+ version: string;
263
+ terms: TermEntry[];
264
+ allowlist?: string[];
265
+ normalizers?: NormalizerStep[];
266
+ }
267
+
268
+ export interface TermEntry {
269
+ term: string; // canonical form
270
+ severity: 1 | 2 | 3 | 4 | 5; // 1 mild ... 5 extreme
271
+ category?: string[]; // e.g. ["insult", "sexual", "slur"]
272
+ match?: "word" | "phrase"; // default "word"
273
+ variants?: string[]; // additional spellings/romanizations
274
+ }
275
+ ```
276
+
277
+ Example pack:
278
+
279
+ ```ts
280
+ import type { LanguagePack } from "@shankarkharel/profanity-core";
281
+
282
+ const myPack: LanguagePack = {
283
+ code: "my-lang",
284
+ version: "1.0.0",
285
+ allowlist: ["assistant"],
286
+ terms: [
287
+ { term: "badword", severity: 3, category: ["general"], match: "word" },
288
+ { term: "very bad phrase", severity: 4, category: ["general"], match: "phrase" },
289
+ { term: "kutta", severity: 3, category: ["insult"], variants: ["kuttaaa"] },
290
+ ],
291
+ };
292
+
293
+ export default myPack;
294
+ ```
295
+
296
+ Then:
297
+
298
+ ```ts
299
+ const engine = new ProfanityEngine([myPack]);
300
+ engine.analyze("badword here");
301
+ ```
302
+
303
+ ---
304
+
305
+ ## License
306
+
307
+ MIT (or your chosen license)
308
+
309
+ ````
310
+
311
+ ---
312
+
313
+ ## ✅ `packages/profanity-lang-en/README.md`
314
+
315
+ ```md
316
+ # @shankarkharel/profanity-lang-en
317
+
318
+ English profanity language pack for `@shankarkharel/profanity-core`.
319
+
320
+ ## Install
321
+
322
+ ```bash
323
+ npm i @shankarkharel/profanity-core @shankarkharel/profanity-lang-en
324
+ ````
325
+
326
+ ## Usage
327
+
328
+ ```ts
329
+ import { ProfanityEngine } from "@shankarkharel/profanity-core";
330
+ import en from "@shankarkharel/profanity-lang-en";
331
+
332
+ const engine = new ProfanityEngine([en]);
333
+
334
+ console.log(engine.analyze("this is crap"));
335
+ console.log(engine.censor("this is crap", { preserveFirstLast: true }));
336
+ ```
337
+
338
+ ## Notes
339
+
340
+ * Matching is normalized (lowercase, punctuation stripped, whitespace collapsed).
341
+ * English also applies leetspeak normalization.
342
+
343
+ ````
344
+
345
+ ---
346
+
347
+ ## ✅ `packages/profanity-lang-ne/README.md`
348
+
349
+ ```md
350
+ # @shankarkharel/profanity-lang-ne
351
+
352
+ Nepali profanity language pack (Devanagari) for `@shankarkharel/profanity-core`.
353
+
354
+ ## Install
355
+
356
+ ```bash
357
+ npm i @shankarkharel/profanity-core @shankarkharel/profanity-lang-ne
358
+ ````
359
+
360
+ ## Usage
361
+
362
+ ```ts
363
+ import { ProfanityEngine } from "@shankarkharel/profanity-core";
364
+ import ne from "@shankarkharel/profanity-lang-ne";
365
+
366
+ const engine = new ProfanityEngine([ne]);
367
+
368
+ const text = "तँ गधा हो?";
369
+ console.log(engine.analyze(text));
370
+ console.log(engine.censor(text, { replaceWith: "[censored]" }));
371
+ ```
372
+
373
+ ## Tips
374
+
375
+ * This pack targets Nepali written in **Devanagari**.
376
+ * For Romanized Nepali (e.g. `kutta`, `sale`) use `@shankarkharel/profanity-lang-ne-rom`.
377
+
378
+ ````
379
+
380
+ ---
381
+
382
+ ## ✅ `packages/profanity-lang-ne-rom/README.md`
383
+
384
+ ```md
385
+ # @shankarkharel/profanity-lang-ne-rom
386
+
387
+ Nepali profanity language pack (Romanized) for `@shankarkharel/profanity-core`.
388
+
389
+ ## Install
390
+
391
+ ```bash
392
+ npm i @shankarkharel/profanity-core @shankarkharel/profanity-lang-ne-rom
393
+ ````
394
+
395
+ ## Usage
396
+
397
+ ```ts
398
+ import { ProfanityEngine } from "@shankarkharel/profanity-core";
399
+ import neRom from "@shankarkharel/profanity-lang-ne-rom";
400
+
401
+ const engine = new ProfanityEngine([neRom]);
402
+
403
+ const text = "kutta and sale";
404
+ console.log(engine.analyze(text));
405
+ console.log(engine.censor(text, { preservePrefix: 1, preserveSuffix: 1 }));
406
+ ```
407
+
408
+ ## Tips
409
+
410
+ * This pack targets Nepali profanity written in **Latin/Roman letters**.
411
+ * For Devanagari Nepali use `@shankarkharel/profanity-lang-ne`.
412
+
413
+ ````
414
+
415
+ ---
416
+
417
+ ## ✅ `packages/profanity-nest/README.md` (NestJS)
418
+
419
+ **Important:** I’m writing this in a standard way. If your Nest package exports different names than below, paste your `packages/profanity-nest/src/index.ts` and I’ll make it exact.
420
+
421
+ ```md
422
+ # @shankarkharel/profanity-nest
423
+
424
+ NestJS integration for `@shankarkharel/profanity-core`.
425
+
426
+ ## Install
427
+
428
+ ```bash
429
+ npm i @shankarkharel/profanity-nest @shankarkharel/profanity-core
430
+ npm i @shankarkharel/profanity-lang-en
431
+ # optional:
432
+ npm i @shankarkharel/profanity-lang-ne @shankarkharel/profanity-lang-ne-rom
433
+ ````
434
+
435
+ ## Setup
436
+
437
+ ```ts
438
+ import { Module } from "@nestjs/common";
439
+ import { ProfanityModule } from "@shankarkharel/profanity-nest";
440
+
441
+ import en from "@shankarkharel/profanity-lang-en";
442
+ import ne from "@shankarkharel/profanity-lang-ne";
443
+ import neRom from "@shankarkharel/profanity-lang-ne-rom";
444
+
445
+ @Module({
446
+ imports: [
447
+ ProfanityModule.forRoot({
448
+ packs: [en, ne, neRom],
449
+ options: {
450
+ severityThreshold: 1,
451
+ },
452
+ }),
453
+ ],
454
+ })
455
+ export class AppModule {}
456
+ ```
457
+
458
+ ## Use in a service/controller
459
+
460
+ ```ts
461
+ import { Controller, Get } from "@nestjs/common";
462
+ import { ProfanityService } from "@shankarkharel/profanity-nest";
463
+
464
+ @Controller()
465
+ export class AppController {
466
+ constructor(private readonly profanity: ProfanityService) {}
467
+
468
+ @Get("check")
469
+ check() {
470
+ const text = "this is crap and kutta";
471
+
472
+ return {
473
+ analysis: this.profanity.analyze(text),
474
+ censored: this.profanity.censor(text, { preserveFirstLast: true }),
475
+ profane: this.profanity.isProfane(text),
476
+ };
477
+ }
478
+ }
479
+ ```
480
+
481
+ ## What it does
482
+
483
+ * Provides a singleton `ProfanityEngine` configured with packs/options
484
+ * Exposes `analyze`, `isProfane`, `censor` via injectable service
485
+
486
+ ## License
487
+
488
+ MIT
489
+
490
+
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@shankarkharel/profanity-lang-ne-rom",
3
- "version": "1.0.0",
3
+ "version": "3.0.0",
4
4
  "main": "dist/index.cjs",
5
5
  "module": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -22,7 +22,7 @@
22
22
  "lint": "echo \"(add eslint later)\""
23
23
  },
24
24
  "dependencies": {
25
- "@shankarkharel/profanity-core": "1.0.0"
25
+ "@shankarkharel/profanity-core": "3.0.0"
26
26
  },
27
27
  "publishConfig": {
28
28
  "access": "public"