allprofanity 2.2.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +144 -25
- package/allprofanity.config.example.json +6 -0
- package/bin/init.js +1 -1
- package/bin/mcp.js +6 -0
- package/config.schema.json +44 -0
- package/dist/algos/aho-corasick.d.ts +11 -1
- package/dist/algos/aho-corasick.js +31 -6
- package/dist/algos/aho-corasick.js.map +1 -1
- package/dist/algos/bloom-filter.d.ts +2 -2
- package/dist/algos/bloom-filter.js +6 -6
- package/dist/algos/bloom-filter.js.map +1 -1
- package/dist/index.d.ts +131 -11
- package/dist/index.js +736 -126
- package/dist/index.js.map +1 -1
- package/dist/languages/hindi-words.js +2 -2
- package/dist/languages/hindi-words.js.map +1 -1
- package/dist/mcp/server.d.ts +30 -0
- package/dist/mcp/server.js +364 -0
- package/dist/mcp/server.js.map +1 -0
- package/dist/mcp/stdio.d.ts +1 -0
- package/dist/mcp/stdio.js +72 -0
- package/dist/mcp/stdio.js.map +1 -0
- package/examples-config/README.md +113 -0
- package/examples-config/chat-app.json +24 -0
- package/examples-config/content-moderation.json +42 -0
- package/examples-config/family-friendly-max.json +33 -0
- package/examples-config/high-throughput-api.json +29 -0
- package/examples-config/low-latency-minimal.json +24 -0
- package/examples-config/medical-professional.json +42 -0
- package/examples-config/multilingual-global.json +33 -0
- package/package.json +17 -7
package/dist/index.d.ts
CHANGED
|
@@ -255,6 +255,54 @@ export interface AllProfanityOptions {
|
|
|
255
255
|
*/
|
|
256
256
|
scoreThreshold?: number;
|
|
257
257
|
};
|
|
258
|
+
/**
|
|
259
|
+
* Evasion-protection configuration. All passes are enabled by default and
|
|
260
|
+
* only run when their trigger characters are present in the text, so they
|
|
261
|
+
* add near-zero cost on ordinary input.
|
|
262
|
+
*/
|
|
263
|
+
evasionProtection?: {
|
|
264
|
+
/**
|
|
265
|
+
* Fold unicode evasion: fullwidth forms (fuck), Cyrillic/Greek
|
|
266
|
+
* homoglyphs (fυck), diacritics (fück) and invisible characters
|
|
267
|
+
* (zero-width spaces, soft hyphens) injected inside words.
|
|
268
|
+
*
|
|
269
|
+
* @default true
|
|
270
|
+
*/
|
|
271
|
+
unicode?: boolean;
|
|
272
|
+
/**
|
|
273
|
+
* Collapse stretched characters ("fuuuuck" -> "fuck"). Only triggers when
|
|
274
|
+
* a run of 3+ identical characters exists.
|
|
275
|
+
*
|
|
276
|
+
* @default true
|
|
277
|
+
*/
|
|
278
|
+
repeatedCharacters?: boolean;
|
|
279
|
+
/**
|
|
280
|
+
* Resolve masked characters as single-character wildcards ("f*ck",
|
|
281
|
+
* "f#ck", "f@ck"). A masked token only matches when the visible letters
|
|
282
|
+
* align exactly with a dictionary word.
|
|
283
|
+
*
|
|
284
|
+
* @default true
|
|
285
|
+
*/
|
|
286
|
+
maskedCharacters?: boolean;
|
|
287
|
+
/**
|
|
288
|
+
* Detect words spelled out with uniform single separators
|
|
289
|
+
* ("f u c k", "f.u.c.k"). The joined letters must equal a dictionary
|
|
290
|
+
* word exactly, which keeps initialisms like "U S A" clean.
|
|
291
|
+
*
|
|
292
|
+
* @default true
|
|
293
|
+
*/
|
|
294
|
+
separatedLetters?: boolean;
|
|
295
|
+
/**
|
|
296
|
+
* Detect unambiguous profanity stems embedded inside larger tokens
|
|
297
|
+
* ("sisfuck", "totalshitshow"). Applies only to a curated list of
|
|
298
|
+
* strong words that never occur in legitimate vocabulary, with built-in
|
|
299
|
+
* exceptions (Scunthorpe, mishit, snigger, ...), so "classic", "bass"
|
|
300
|
+
* and "Hitchcock" stay clean.
|
|
301
|
+
*
|
|
302
|
+
* @default true
|
|
303
|
+
*/
|
|
304
|
+
embeddedWords?: boolean;
|
|
305
|
+
};
|
|
258
306
|
/**
|
|
259
307
|
* Performance optimization configuration.
|
|
260
308
|
*/
|
|
@@ -291,6 +339,8 @@ export interface AllProfanityOptions {
|
|
|
291
339
|
* ```
|
|
292
340
|
*/
|
|
293
341
|
export declare enum ProfanitySeverity {
|
|
342
|
+
/** No profanity detected */
|
|
343
|
+
NONE = 0,
|
|
294
344
|
/** Mild profanity: 1 unique word or 1 total match */
|
|
295
345
|
MILD = 1,
|
|
296
346
|
/** Moderate profanity: 2 unique words or 2 total matches */
|
|
@@ -461,14 +511,22 @@ export declare class AllProfanity {
|
|
|
461
511
|
private caseSensitive;
|
|
462
512
|
private strictMode;
|
|
463
513
|
private detectPartialWords;
|
|
514
|
+
private evasionUnicode;
|
|
515
|
+
private evasionRepeatedChars;
|
|
516
|
+
private evasionMaskedChars;
|
|
517
|
+
private evasionSeparatedLetters;
|
|
518
|
+
private evasionEmbeddedWords;
|
|
464
519
|
private readonly availableLanguages;
|
|
465
520
|
private readonly leetMappings;
|
|
466
521
|
private readonly dynamicWords;
|
|
467
522
|
private ahoCorasickAutomaton;
|
|
468
523
|
private bloomFilter;
|
|
469
524
|
private contextAnalyzer;
|
|
525
|
+
private contextScoreThreshold;
|
|
470
526
|
private matchingAlgorithm;
|
|
471
527
|
private resultCache;
|
|
528
|
+
private cacheMaxSize;
|
|
529
|
+
private leetTokensByFirstChar;
|
|
472
530
|
/**
|
|
473
531
|
* Creates a new AllProfanity instance with the specified configuration.
|
|
474
532
|
*
|
|
@@ -514,17 +572,56 @@ export declare class AllProfanity {
|
|
|
514
572
|
*/
|
|
515
573
|
private initializeAdvancedAlgorithms;
|
|
516
574
|
/**
|
|
517
|
-
* Normalize leet speak to regular characters
|
|
518
|
-
*
|
|
519
|
-
*
|
|
575
|
+
* Normalize leet speak to regular characters, keeping a map from each
|
|
576
|
+
* normalized character back to its source range in the input text.
|
|
577
|
+
*
|
|
578
|
+
* For normalized index i, starts[i]/ends[i] give the [start, end) range in
|
|
579
|
+
* the input that produced that character. A match [s, e) in the normalized
|
|
580
|
+
* string therefore spans [starts[s], ends[e - 1]) in the input. This is what
|
|
581
|
+
* keeps positions correct when length-changing mappings like "ph" -> "f"
|
|
582
|
+
* apply.
|
|
520
583
|
*/
|
|
521
|
-
private
|
|
584
|
+
private normalizeLeetSpeakWithMap;
|
|
522
585
|
/**
|
|
523
|
-
*
|
|
524
|
-
*
|
|
525
|
-
*
|
|
586
|
+
* Fold unicode evasion tactics into ASCII with a position map: fullwidth
|
|
587
|
+
* forms, Cyrillic/Greek homoglyphs, Latin diacritics, and invisible
|
|
588
|
+
* characters injected inside words. Non-Latin scripts (Devanagari, Tamil,
|
|
589
|
+
* etc.) pass through untouched. Returns null when nothing changed.
|
|
526
590
|
*/
|
|
527
|
-
private
|
|
591
|
+
private unicodeNormalizeWithMap;
|
|
592
|
+
/**
|
|
593
|
+
* Collapse runs of repeated characters ("fuuuuck" -> "fuck") with a
|
|
594
|
+
* position map. Only triggers when a run of 3+ identical characters
|
|
595
|
+
* exists, so ordinary doubled letters never pay for this pass.
|
|
596
|
+
* Returns null when not triggered.
|
|
597
|
+
*/
|
|
598
|
+
private collapseRepeatsWithMap;
|
|
599
|
+
/**
|
|
600
|
+
* Build the list of (text, position-map) variants to scan: the base text
|
|
601
|
+
* plus unicode-folded, leet-normalized and repeat-collapsed variants, each
|
|
602
|
+
* included only when its normalization actually changed something.
|
|
603
|
+
*/
|
|
604
|
+
private buildScanPasses;
|
|
605
|
+
/**
|
|
606
|
+
* Find dictionary words hidden behind masked characters ("f*ck", "f#ck").
|
|
607
|
+
* Each mask matches exactly one character and the token's visible letters
|
|
608
|
+
* must align with a dictionary word, so "c#" or "5% off" never flag.
|
|
609
|
+
*/
|
|
610
|
+
private findMaskedMatches;
|
|
611
|
+
/**
|
|
612
|
+
* Find words spelled out with a uniform single separator ("f u c k",
|
|
613
|
+
* "f.u.c.k"). The joined letters must equal a dictionary word exactly:
|
|
614
|
+
* runs like "U S A" or letters inside spelled-out sentences never flag.
|
|
615
|
+
*/
|
|
616
|
+
private findSeparatedMatches;
|
|
617
|
+
/**
|
|
618
|
+
* Find unambiguous profanity stems embedded inside larger tokens
|
|
619
|
+
* ("sisfuck", "totalshitshow"). Only stems from EMBEDDED_STRONG_STEMS that
|
|
620
|
+
* are currently in the dictionary are considered, and tokens listed in
|
|
621
|
+
* EMBEDDED_SAFE_WORDS or the whitelist never flag. The whole containing
|
|
622
|
+
* token is reported so cleaning masks all of it.
|
|
623
|
+
*/
|
|
624
|
+
private findEmbeddedMatches;
|
|
528
625
|
/**
|
|
529
626
|
* Check if a match is bounded by word boundaries (strict mode).
|
|
530
627
|
* @param text - The text.
|
|
@@ -548,6 +645,11 @@ export declare class AllProfanity {
|
|
|
548
645
|
* @returns True if whitelisted, false otherwise.
|
|
549
646
|
*/
|
|
550
647
|
private isWhitelistedMatch;
|
|
648
|
+
/**
|
|
649
|
+
* In partial-word mode, check whether the word CONTAINING the match is
|
|
650
|
+
* whitelisted: with "classic" whitelisted, the embedded "ass" must not flag.
|
|
651
|
+
*/
|
|
652
|
+
private isWhitelistedContainingWord;
|
|
551
653
|
/**
|
|
552
654
|
* Remove overlapping matches, keeping only the longest at each start position.
|
|
553
655
|
* @param matches - Array of match results.
|
|
@@ -559,13 +661,24 @@ export declare class AllProfanity {
|
|
|
559
661
|
*/
|
|
560
662
|
private findMatchesWithAhoCorasick;
|
|
561
663
|
/**
|
|
562
|
-
*
|
|
664
|
+
* Check whether the Bloom Filter can quickly rule out any profanity in the
|
|
665
|
+
* text. Only safe for ASCII whole-word matching: partial matches and
|
|
666
|
+
* non-ASCII scripts can match inside tokens, so they bypass the prefilter.
|
|
667
|
+
*/
|
|
668
|
+
private bloomQuickReject;
|
|
669
|
+
/**
|
|
670
|
+
* Hybrid approach: Bloom Filter for quick rejection, Aho-Corasick for matching
|
|
563
671
|
*/
|
|
564
672
|
private findMatchesHybrid;
|
|
565
673
|
/**
|
|
566
674
|
* Apply context analysis to filter false positives
|
|
567
675
|
*/
|
|
568
676
|
private applyContextAnalysis;
|
|
677
|
+
/**
|
|
678
|
+
* Drop all cached detection results. Must be called whenever the word lists
|
|
679
|
+
* or any option that affects detection output changes.
|
|
680
|
+
*/
|
|
681
|
+
private invalidateCache;
|
|
569
682
|
/**
|
|
570
683
|
* Detects profanity in the provided text and returns comprehensive analysis.
|
|
571
684
|
*
|
|
@@ -661,6 +774,11 @@ export declare class AllProfanity {
|
|
|
661
774
|
* @see {@link detect} for detailed profanity analysis
|
|
662
775
|
*/
|
|
663
776
|
check(text: string): boolean;
|
|
777
|
+
/**
|
|
778
|
+
* Trie scan that stops at the first match surviving the whole-word,
|
|
779
|
+
* whitelist and boundary checks. Powers the fast path in check().
|
|
780
|
+
*/
|
|
781
|
+
private hasMatchInPass;
|
|
664
782
|
/**
|
|
665
783
|
* Cleans text by replacing profanity with a placeholder character.
|
|
666
784
|
*
|
|
@@ -1026,9 +1144,10 @@ export declare class AllProfanity {
|
|
|
1026
1144
|
*/
|
|
1027
1145
|
getConfig(): Partial<AllProfanityOptions>;
|
|
1028
1146
|
/**
|
|
1029
|
-
* Rebuild
|
|
1147
|
+
* Rebuild all matching structures (trie, Aho-Corasick automaton, Bloom
|
|
1148
|
+
* Filter) from loaded dictionaries and dynamic words.
|
|
1030
1149
|
*/
|
|
1031
|
-
private
|
|
1150
|
+
private rebuildIndexes;
|
|
1032
1151
|
/**
|
|
1033
1152
|
* Update configuration options for the profanity filter.
|
|
1034
1153
|
* @param options - Partial configuration object.
|
|
@@ -1043,6 +1162,7 @@ export declare class AllProfanity {
|
|
|
1043
1162
|
}
|
|
1044
1163
|
/**
|
|
1045
1164
|
* Singleton instance of AllProfanity with default configuration.
|
|
1165
|
+
* Silent so that importing the library never writes to the console.
|
|
1046
1166
|
*/
|
|
1047
1167
|
declare const allProfanity: AllProfanity;
|
|
1048
1168
|
export default allProfanity;
|