badwords-py 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,114 @@
1
+ {
2
+ "canadian_aboriginal": {
3
+ "ᑎ": "n", "ᑌ": "u", "ᙐ": "d", "ᗪ": "d", "ᗣ": "a",
4
+ "ᐃ": "i", "ᐅ": "o", "ᐊ": "a", "ᐁ": "e", "ᐄ": "i",
5
+ "ᐆ": "o", "ᐋ": "a", "ᐍ": "e", "ᐏ": "w", "ᐑ": "wi",
6
+ "ᐓ": "wo", "ᐕ": "wa", "ᐗ": "p", "ᐘ": "t", "ᐚ": "k",
7
+ "ᐜ": "c", "ᐞ": "m", "ᐠ": "n", "ᐢ": "s", "ᐤ": "y",
8
+ "ᐦ": "h", "ᐧ": "w", "ᐨ": "s", "ᐩ": "s", "ᐪ": "s"
9
+ },
10
+ "mathematical": {
11
+ "𝔞": "a", "𝔟": "b", "𝔠": "c", "𝔡": "d", "𝔢": "e",
12
+ "𝔣": "f", "𝔤": "g", "𝔥": "h", "𝔦": "i", "𝔧": "j",
13
+ "𝔨": "k", "𝔩": "l", "𝔪": "m", "𝔫": "n", "𝔬": "o",
14
+ "𝔭": "p", "𝔮": "q", "𝔯": "r", "𝔰": "s", "𝔱": "t",
15
+ "𝔲": "u", "𝔳": "v", "𝔴": "w", "𝔵": "x", "𝔶": "y",
16
+ "𝔷": "z"
17
+ },
18
+ "enclosed": {
19
+ "🅰": "a", "🅱": "b", "🅲": "c", "🅳": "d", "🅴": "e",
20
+ "🅵": "f", "🅶": "g", "🅷": "h", "🅸": "i", "🅹": "j",
21
+ "🅺": "k", "🅻": "l", "🅼": "m", "🅽": "n", "🅾": "o",
22
+ "🅿": "p", "🆀": "q", "🆁": "r", "🆂": "s", "🆃": "t",
23
+ "🆄": "u", "🆅": "v", "🆆": "w", "🆇": "x", "🆈": "y",
24
+ "🆉": "z",
25
+ "🅐": "a", "🅑": "b", "🅒": "c", "🅓": "d", "🅔": "e",
26
+ "🅕": "f", "🅖": "g", "🅗": "h", "🅘": "i", "🅙": "j",
27
+ "🅚": "k", "🅛": "l", "🅜": "m", "🅝": "n", "🅞": "o",
28
+ "🅟": "p", "🅠": "q", "🅡": "r", "🅢": "s", "🅣": "t",
29
+ "🅤": "u", "🅥": "v", "🅦": "w", "🅧": "x", "🅨": "y",
30
+ "🅩": "z"
31
+ },
32
+ "circled": {
33
+ "ⓐ": "a", "ⓑ": "b", "ⓒ": "c", "ⓓ": "d", "ⓔ": "e",
34
+ "ⓕ": "f", "ⓖ": "g", "ⓗ": "h", "ⓘ": "i", "ⓙ": "j",
35
+ "ⓚ": "k", "ⓛ": "l", "ⓜ": "m", "ⓝ": "n", "ⓞ": "o",
36
+ "ⓟ": "p", "ⓠ": "q", "ⓡ": "r", "ⓢ": "s", "ⓣ": "t",
37
+ "ⓤ": "u", "ⓥ": "v", "ⓦ": "w", "ⓧ": "x", "ⓨ": "y",
38
+ "ⓩ": "z"
39
+ },
40
+ "aesthetic": {
41
+ "a": "a", "b": "b", "c": "c", "d": "d", "e": "e",
42
+ "f": "f", "g": "g", "h": "h", "i": "i", "j": "j",
43
+ "k": "k", "l": "l", "m": "m", "n": "n", "o": "o",
44
+ "p": "p", "q": "q", "r": "r", "s": "s", "t": "t",
45
+ "u": "u", "v": "v", "w": "w", "x": "x", "y": "y",
46
+ "z": "z",
47
+ "𝐚": "a", "𝐛": "b", "𝐜": "c", "𝐝": "d", "𝐞": "e",
48
+ "𝐟": "f", "𝐠": "g", "𝐡": "h", "𝐢": "i", "𝐣": "j",
49
+ "𝐤": "k", "𝐥": "l", "𝐦": "m", "𝐧": "n", "𝐨": "o",
50
+ "𝐩": "p", "𝐪": "q", "𝐫": "r", "𝐬": "s", "𝐭": "t",
51
+ "𝐮": "u", "𝐯": "v", "𝐰": "w", "𝐱": "x", "𝐲": "y",
52
+ "𝐳": "z",
53
+ "𝒂": "a", "𝒃": "b", "𝒄": "c", "𝒅": "d", "𝒆": "e",
54
+ "𝒇": "f", "𝒈": "g", "𝒉": "h", "𝒊": "i", "𝒋": "j",
55
+ "𝒌": "k", "𝒍": "l", "𝒎": "m", "𝒏": "n", "𝒐": "o",
56
+ "𝒑": "p", "𝒒": "q", "𝒓": "r", "𝒔": "s", "𝒕": "t",
57
+ "𝒖": "u", "𝒗": "v", "𝒘": "w", "𝒙": "x", "𝒚": "y",
58
+ "𝒛": "z",
59
+ "𝓪": "a", "𝓫": "b", "𝓬": "c", "𝓭": "d", "𝓮": "e",
60
+ "𝓯": "f", "𝓰": "g", "𝓱": "h", "𝓲": "i", "𝓳": "j",
61
+ "𝓴": "k", "𝓵": "l", "𝓶": "m", "𝓷": "n", "𝓸": "o",
62
+ "𝓹": "p", "𝓺": "q", "𝓻": "r", "𝓼": "s", "𝓽": "t",
63
+ "𝓾": "u", "𝓿": "v", "𝔀": "w", "𝔁": "x", "𝔂": "y",
64
+ "𝔃": "z",
65
+ "𝔞": "a", "𝔟": "b", "𝔠": "c", "𝔡": "d", "𝔢": "e",
66
+ "𝔣": "f", "𝔤": "g", "𝔥": "h", "𝔦": "i", "𝔧": "j",
67
+ "𝔨": "k", "𝔩": "l", "𝔪": "m", "𝔫": "n", "𝔬": "o",
68
+ "𝔭": "p", "𝔮": "q", "𝔯": "r", "𝔰": "s", "𝔱": "t",
69
+ "𝔲": "u", "𝔳": "v", "𝔴": "w", "𝔵": "x", "𝔶": "y",
70
+ "𝔷": "z",
71
+ "𝕒": "a", "𝕓": "b", "𝕔": "c", "𝕕": "d", "𝕖": "e",
72
+ "𝕗": "f", "𝕘": "g", "𝕙": "h", "𝕚": "i", "𝕛": "j",
73
+ "𝕜": "k", "𝕝": "l", "𝕞": "m", "𝕟": "n", "𝕠": "o",
74
+ "𝕡": "p", "𝕢": "q", "𝕣": "r", "𝕤": "s", "𝕥": "t",
75
+ "𝕦": "u", "𝕧": "v", "𝕨": "w", "𝕩": "x", "𝕪": "y",
76
+ "𝕫": "z",
77
+ "𝖆": "a", "𝖇": "b", "𝖈": "c", "𝖉": "d", "𝖊": "e",
78
+ "𝖋": "f", "𝖌": "g", "𝖍": "h", "𝖎": "i", "𝖏": "j",
79
+ "𝖐": "k", "𝖑": "l", "𝖒": "m", "𝖓": "n", "𝖔": "o",
80
+ "𝖕": "p", "𝖖": "q", "𝖗": "r", "𝖘": "s", "𝖙": "t",
81
+ "𝖚": "u", "𝖛": "v", "𝖜": "w", "𝖝": "x", "𝖞": "y",
82
+ "𝖟": "z",
83
+ "𝗮": "a", "𝗯": "b", "𝗰": "c", "𝗱": "d", "𝗲": "e",
84
+ "𝗳": "f", "𝗴": "g", "𝗵": "h", "𝗶": "i", "𝗷": "j",
85
+ "𝗸": "k", "𝗹": "l", "𝗺": "m", "𝗻": "n", "𝗼": "o",
86
+ "𝗽": "p", "𝗾": "q", "𝗿": "r", "𝘀": "s", "𝘁": "t",
87
+ "𝘂": "u", "𝘃": "v", "𝘄": "w", "𝘅": "x", "𝘆": "y",
88
+ "𝘇": "z",
89
+ "𝘢": "a", "𝘣": "b", "𝘤": "c", "𝘥": "d", "𝘦": "e",
90
+ "𝘧": "f", "𝘨": "g", "𝘩": "h", "𝘪": "i", "𝘫": "j",
91
+ "𝘬": "k", "𝘭": "l", "𝘮": "m", "𝘯": "n", "𝘰": "o",
92
+ "𝘱": "p", "𝘲": "q", "𝘳": "r", "𝘴": "s", "𝘵": "t",
93
+ "𝘶": "u", "𝘷": "v", "𝘸": "w", "𝘹": "x", "𝘺": "y",
94
+ "𝘻": "z",
95
+ "𝙖": "a", "𝙗": "b", "𝙘": "c", "𝙙": "d", "𝙚": "e",
96
+ "𝙛": "f", "𝙜": "g", "𝙝": "h", "𝙞": "i", "𝙟": "j",
97
+ "𝙠": "k", "𝙡": "l", "𝙢": "m", "𝙣": "n", "𝙤": "o",
98
+ "𝙥": "p", "𝙦": "q", "𝙧": "r", "𝙨": "s", "𝙩": "t",
99
+ "𝙪": "u", "𝙫": "v", "𝙬": "w", "𝙭": "x", "𝙮": "y",
100
+ "𝙯": "z",
101
+ "𝚊": "a", "𝚋": "b", "𝚌": "c", "𝚍": "d", "𝚎": "e",
102
+ "𝚏": "f", "𝚐": "g", "𝚑": "h", "𝚒": "i", "𝚓": "j",
103
+ "𝚔": "k", "𝚕": "l", "𝚖": "m", "𝚗": "n", "𝚘": "o",
104
+ "𝚙": "p", "𝚚": "q", "𝚛": "r", "𝚜": "s", "𝚝": "t",
105
+ "𝚞": "u", "𝚟": "v", "𝚠": "w", "𝚡": "x", "𝚢": "y",
106
+ "𝚣": "z",
107
+ "【": "[", "】": "]", "〖": "[", "〗": "]", "〘": "[", "〙": "]",
108
+ "〚": "[", "〛": "]", "〝": "\"", "〞": "\"", "〟": "\"",
109
+ "〰": "-", "〱": "-", "〲": "-", "〳": "-", "〴": "-",
110
+ "〵": "-", "〶": "-", "〷": "-", "〸": "-", "〹": "-",
111
+ "〺": "-", "〻": "-", "〼": "-", "〽": "-", "〾": "-",
112
+ "〿": "-"
113
+ }
114
+ }
@@ -0,0 +1,207 @@
1
+ """Module for advanced text processing and normalization."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ import unicodedata
8
+ from pathlib import Path
9
+ from typing import Dict, Set
10
+
11
+ class TextProcessor:
12
+ """A class for advanced text processing and normalization."""
13
+
14
+ def __init__(
15
+ self,
16
+ processing_normalize_text: bool = True,
17
+ processing_aggressive_normalize: bool = True,
18
+ processing_transliterate: bool = True,
19
+ processing_replace_homoglyphs: bool = True,
20
+ ) -> None:
21
+ """Initialize the text processor."""
22
+ self.processing_normalize_text = processing_normalize_text
23
+ self.processing_aggressive_normalize = processing_aggressive_normalize
24
+ self.processing_transliterate = processing_transliterate
25
+ self.processing_replace_homoglyphs = processing_replace_homoglyphs
26
+
27
+ self.resource_dir = Path(__file__).parent / 'resource'
28
+ self.unicode_mappings = self._load_unicode_mappings()
29
+
30
+ if self.processing_replace_homoglyphs == True:
31
+ self.homoglyphs = self._load_homoglyphs()
32
+
33
+ self.character_frequency = self._load_character_frequency()
34
+
35
+ if self.processing_transliterate == True:
36
+ self.cyrillic_to_latin = self._load_transliteration()
37
+ self.latin_to_cyrillic = {v: k for k, v in self.cyrillic_to_latin.items()}
38
+
39
+ if self.processing_replace_homoglyphs == True:
40
+ self._build_homoglyph_map()
41
+
42
+ self._build_frequency_map()
43
+
44
+ def _load_unicode_mappings(self) -> Dict[str, str]:
45
+ """Load Unicode mappings from JSON file."""
46
+ with open(self.resource_dir / 'unicode_mappings.json', 'r', encoding='utf-8') as f:
47
+ data = json.load(f)
48
+ mappings = {}
49
+ for category in data.values():
50
+ mappings.update(category)
51
+ return mappings
52
+
53
+ def _load_homoglyphs(self) -> Dict[str, list[str]]:
54
+ """Load homoglyph mappings from JSON file."""
55
+ with open(self.resource_dir / 'homoglyphs.json', 'r', encoding='utf-8') as f:
56
+ return json.load(f)
57
+
58
+ def _load_character_frequency(self) -> Dict[str, list[str]]:
59
+ """Load character frequency mappings from JSON file."""
60
+ with open(self.resource_dir / 'character_frequency.json', 'r', encoding='utf-8') as f:
61
+ return json.load(f)
62
+
63
+ def _load_transliteration(self) -> Dict[str, str]:
64
+ """Load transliteration mappings from JSON file."""
65
+ with open(self.resource_dir / 'transliteration.json', 'r', encoding='utf-8') as f:
66
+ data = json.load(f)
67
+ return data['cyrillic_to_latin']
68
+
69
+ def _build_homoglyph_map(self) -> None:
70
+ """Build a comprehensive homoglyph map."""
71
+ self.homoglyph_map: Dict[str, Set[str]] = {}
72
+ for standard, variants in self.homoglyphs.items():
73
+ self.homoglyph_map[standard] = set(variants)
74
+ for variant in variants:
75
+ if variant not in self.homoglyph_map:
76
+ self.homoglyph_map[variant] = set()
77
+ self.homoglyph_map[variant].add(standard)
78
+
79
+ def _build_frequency_map(self) -> None:
80
+ """Build a comprehensive frequency-based substitution map."""
81
+ self.frequency_map: Dict[str, Set[str]] = {}
82
+ for standard, variants in self.character_frequency.items():
83
+ self.frequency_map[standard] = set(variants)
84
+ for variant in variants:
85
+ if variant not in self.frequency_map:
86
+ self.frequency_map[variant] = set()
87
+ self.frequency_map[variant].add(standard)
88
+
89
+ def normalize_unicode(self, text: str) -> str:
90
+ """Normalize Unicode characters to their basic form.
91
+
92
+ Args:
93
+ text: Input text to normalize.
94
+
95
+ Returns:
96
+ Normalized text.
97
+ """
98
+ text = unicodedata.normalize('NFKC', text)
99
+
100
+ text = ''.join(c for c in text if not unicodedata.combining(c))
101
+
102
+ text = text.lower()
103
+
104
+ result = []
105
+ for char in text:
106
+ if char in self.unicode_mappings:
107
+ result.append(self.unicode_mappings[char])
108
+ else:
109
+ result.append(char)
110
+
111
+ return ''.join(result)
112
+
113
+ def normalize_text(self, text: str) -> str:
114
+ """Normalize text by converting to lowercase and removing diacritics.
115
+
116
+ Args:
117
+ text: Input text to normalize.
118
+
119
+ Returns:
120
+ Normalized text.
121
+ """
122
+ text = self.normalize_unicode(text)
123
+
124
+ text = re.sub(r'[^\w\s]', '', text)
125
+
126
+ return text
127
+
128
+ def aggressive_normalize(self, text: str) -> str:
129
+ """Perform aggressive text normalization.
130
+
131
+ Args:
132
+ text: Input text to normalize.
133
+
134
+ Returns:
135
+ Aggressively normalized text.
136
+ """
137
+ text = self.normalize_unicode(text)
138
+
139
+ text = ''.join(c for c in text if c.isalnum() or c.isspace())
140
+
141
+ text = ' '.join(text.split())
142
+
143
+ return text
144
+
145
+ def transliterate(self, text: str, to_latin: bool = True) -> str:
146
+ """Transliterate text between Cyrillic and Latin.
147
+
148
+ Args:
149
+ text: Input text to transliterate.
150
+ to_latin: If True, convert to Latin; if False, convert to Cyrillic.
151
+
152
+ Returns:
153
+ Transliterated text.
154
+ """
155
+ mapping = self.cyrillic_to_latin if to_latin else self.latin_to_cyrillic
156
+ result = []
157
+
158
+ for char in text:
159
+ if char in mapping:
160
+ result.append(mapping[char])
161
+ else:
162
+ result.append(char)
163
+
164
+ return ''.join(result)
165
+
166
+ def replace_homoglyphs(self, text: str) -> str:
167
+ """Replace homoglyphs with their standard equivalents.
168
+
169
+ Args:
170
+ text: Input text to process.
171
+
172
+ Returns:
173
+ Text with homoglyphs replaced.
174
+ """
175
+ result = []
176
+ for char in text:
177
+ if char in self.homoglyph_map and self.homoglyph_map[char]:
178
+ result.append(next(iter(self.homoglyph_map[char])))
179
+ else:
180
+ result.append(char)
181
+ return ''.join(result)
182
+
183
+ def process_text(self, text: str) -> str:
184
+ """Apply all text processing steps in sequence.
185
+
186
+ Args:
187
+ text: Input text to process.
188
+
189
+ Returns:
190
+ Fully processed text.
191
+ """
192
+
193
+ txt = text
194
+ if self.processing_normalize_text == True:
195
+ txt = self.normalize_text(txt)
196
+
197
+ if self.processing_aggressive_normalize == True:
198
+ txt = self.aggressive_normalize(txt)
199
+
200
+ if self.processing_transliterate == True:
201
+ txt = self.transliterate(txt, to_latin=True)
202
+ txt = self.transliterate(txt, to_latin=False)
203
+
204
+ if self.processing_replace_homoglyphs == True:
205
+ txt = self.replace_homoglyphs(txt)
206
+
207
+ return txt
@@ -0,0 +1,201 @@
1
+ Metadata-Version: 2.4
2
+ Name: badwords-py
3
+ Version: 2.1.0
4
+ Summary: This is a library for effective moderation of content.
5
+ Author-email: iamlostshe <vanamelcikov7275@gmail.com>, FlacSy <flacsy.x@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/FlacSy/badwords
8
+ Project-URL: Repository, https://github.com/FlacSy/badwords.git
9
+ Project-URL: Issues, https://github.com/FlacSy/badwords/issues
10
+ Keywords: moderation,content filtering,obscenity detection,mood analysis,image moderation
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Development Status :: 5 - Production/Stable
18
+ Classifier: Intended Audience :: Developers
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Requires-Python: >=3.10
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Dynamic: license-file
24
+
25
+ <div align="center">
26
+
27
+ # 🚫 BadWords
28
+
29
+ **High-performance profanity filter for Python with multilingual support and evasion detection.**
30
+
31
+ [![Python Version](https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12%20%7C%203.13-blue?style=flat-square)](https://www.python.org/)
32
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg?style=flat-square)](https://opensource.org/licenses/MIT)
33
+ [![Build Status](https://img.shields.io/badge/build-passing-brightgreen?style=flat-square)](#)
34
+ [![Downloads](https://img.shields.io/pypi/dm/bad-words?style=flat-square&color=orange)](https://pypi.org/project/bad-words/)
35
+
36
+ [Installation](#-installation) • [Quick Start](#-quick-start) • [Supported Languages](#-supported-languages) • [Advanced Evasion Detection](#-advanced-evasion-detection)
37
+
38
+ </div>
39
+
40
+ ---
41
+
42
+ ## 📖 Description
43
+
44
+ `BadWords` is a sophisticated profanity filtering library designed to clean up user-generated content. Unlike simple keyword matching, it uses **similarity scoring**, **homoglyph detection**, and **transliteration** to catch even the most cleverly disguised insults.
45
+
46
+ ## 📦 Installation
47
+
48
+ ### Requirements
49
+ - **Recommended:** Python 3.13
50
+ - **Minimum:** Python 3.10+
51
+
52
+ ### Install via GitHub
53
+ ```bash
54
+ pip install git+[https://github.com/FlacSy/badwords.git](https://github.com/FlacSy/badwords.git)
55
+
56
+ ```
57
+
58
+ ### Install via PyPI
59
+ ```bash
60
+ pip install badwords-py
61
+ ```
62
+
63
+ ---
64
+
65
+ ## ⚡ Quick Start
66
+
67
+ ### Basic Initialization
68
+
69
+ ```python
70
+ from badwords import ProfanityFilter
71
+
72
+ # Initialize filter
73
+ p = ProfanityFilter()
74
+
75
+ # Load specific languages (e.g., English and Russian)
76
+ p.init(languages=["en", "ru"])
77
+
78
+ # Or load ALL 26+ supported languages
79
+ p.init()
80
+
81
+ ```
82
+
83
+ ### Checking and Filtering Text
84
+
85
+ ```python
86
+ text = "Some very b4d text here"
87
+
88
+ # 1. Simple check (Returns Boolean)
89
+ is_bad = p.filter_text(text)
90
+ print(is_bad) # True
91
+
92
+ # 2. Censoring text (Returns String)
93
+ clean_text = p.filter_text(text, replace_character="*")
94
+ print(clean_text) # "Some very *** text here"
95
+
96
+ ```
97
+
98
+ ---
99
+
100
+ ## 🛠 Methods & API
101
+
102
+ ### `filter_text(text, match_threshold=0.8, replace_character=None)`
103
+
104
+ The core method of the library.
105
+
106
+ | Parameter | Type | Default | Description |
107
+ | --- | --- | --- | --- |
108
+ | `text` | `str` | Required | Input text to check. |
109
+ | `match_threshold` | `float` | `0.8` | Similarity threshold (1.0 = exact match, 0.7 = aggressive). |
110
+ | `replace_character` | `str/None` | `None` | If provided, returns censored string. If None, returns bool. |
111
+
112
+ > [!WARNING]
113
+ > **Performance Tip:** Using `match_threshold < 1.0` enables fuzzy matching which is slower. Use `1.0` for high-traffic real-time filtering, or `0.95` for a good balance.
114
+
115
+ ---
116
+
117
+ ## 🧩 Advanced Evasion Detection
118
+
119
+ Standard filters are easy to bypass. `BadWords` is built to detect:
120
+
121
+ * **Homoglyphs:** Detects `hеllo` (using Cyrillic 'е') or `h4llo` (numbers).
122
+ * **Transliteration:** Automatically handles mapping between Cyrillic and Latin alphabets.
123
+ * **Normalization:** Strips diacritics, special characters, and decorative Unicode symbols.
124
+ * **Similarity Analysis:** Uses fuzzy matching to find words with deliberate typos.
125
+
126
+ ### Examples of detected evasions:
127
+
128
+ ```python
129
+ _filter.filter_text("hеllо") # Mixed alphabets (Cyrillic + Latin) -> DETECTED
130
+ _filter.filter_text("h3ll0") # Character substitution -> DETECTED
131
+ _filter.filter_text("h⍺llo") # Mathematical/Greek symbols -> DETECTED
132
+ _filter.filter_text("привет") # Transliterated matches -> DETECTED
133
+
134
+ ```
135
+
136
+ ---
137
+
138
+ ## 🌍 Supported Languages
139
+
140
+ `BadWords` currently supports **26 languages** out of the box:
141
+
142
+ | Code | Language | Code | Language | Code | Language |
143
+ | --- | --- | --- | --- | --- | --- |
144
+ | `en` | English | `ru` | Russian | `ua` | Ukrainian |
145
+ | `de` | German | `fr` | French | `it` | Italian |
146
+ | `sp` | Spanish | `pl` | Polish | `cz` | Czech |
147
+ | `ja` | Japanese | `ko` | Korean | `th` | Thai |
148
+ | ... | & 14 more | | | | |
149
+
150
+ *Use `p.get_all_languages()` to see the full list in your code.*
151
+
152
+ ---
153
+
154
+ ## 🚀 Full Integration Example
155
+
156
+ ```python
157
+ from badwords import ProfanityFilter
158
+
159
+ def monitor_chat():
160
+ # Setup for a global chat
161
+ profanity_filter = ProfanityFilter()
162
+ profanity_filter.init(["en", "ru", "de"])
163
+
164
+ # Custom project-specific banned words
165
+ profanity_filter.add_words(["spam_link_v1", "scam_bot_99"])
166
+
167
+ user_input = "Hey! Check out this b.a.d.w.o.r.d"
168
+
169
+ # Moderate with high accuracy
170
+ is_offensive = profanity_filter.filter_text(user_input, match_threshold=0.95)
171
+
172
+ if is_offensive:
173
+ print("Message blocked: Contains restricted language.")
174
+ else:
175
+ # Proceed with processing
176
+ pass
177
+
178
+ if __name__ == "__main__":
179
+ monitor_chat()
180
+
181
+ ```
182
+
183
+ ---
184
+
185
+ ## 🤝 Contributing
186
+
187
+ Contributions are what make the open-source community an amazing place to learn, inspire, and create.
188
+
189
+ 1. Fork the Project
190
+ 2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`)
191
+ 3. Commit your Changes (`git commit -m 'Add AmazingFeature'`)
192
+ 4. Push to the Branch (`git push origin feature/AmazingFeature`)
193
+ 5. Open a Pull Request
194
+
195
+ ## 📄 License
196
+
197
+ Distributed under the MIT License. See `LICENSE` for more information.
198
+
199
+ <div align="center">
200
+ <sub>Developed with ❤️ by <a href="https://github.com/FlacSy">FlacSy</a></sub>
201
+ </div>
@@ -0,0 +1,38 @@
1
+ badwords/__init__.py,sha256=vcOyALowOTBXKB6J_71Qc88mOJ3hr-phZsAGMKQ7mXI,120
2
+ badwords/check.py,sha256=Gbi0EmNKsc6GUBfHyLRavdpp9ZtM0i4GbHbdttRYojM,5575
3
+ badwords/exceptions.py,sha256=D3L-BuQdH5M-pGpk0uW7MQPr9h1ZOPNFzH6NtP4T_Jo,367
4
+ badwords/text_processor.py,sha256=dlFsoW678I9SCffGzNVp0OBG-D-8AkA1wGO-KuDCcsg,7107
5
+ badwords/resource/br.bdw,sha256=Pl9btZq3bO9i_aOjnU58fS7abJka5XQFZEOmE8sya_8,945
6
+ badwords/resource/character_frequency.json,sha256=60zRDkpvGiuttFYvaN_hXlxlNau1MluaMYPVxzGEyps,250
7
+ badwords/resource/cz.bdw,sha256=Tvc1-6BXbFQwEnKXJQvPRhv_aywhdnGN0ic5tn8j8k8,418
8
+ badwords/resource/da.bdw,sha256=eyZI-MtJTgIy-UvmeGTHM4ve50zn5ZVfokNXG_LFl20,550
9
+ badwords/resource/de.bdw,sha256=th4WYmpUrSs7qeOi6hOtJ25rXhWYrdqzJn3fmlGfP7c,683
10
+ badwords/resource/du.bdw,sha256=tPa6X8xDtWIH04cbmdJdozJvnB0KF_KjyrlvLBKMAmc,349
11
+ badwords/resource/en.bdw,sha256=Tv4QyDGlfs8gG2jf3xN4JKhfVLGpiA2Ewy2rYVtoxuU,1169
12
+ badwords/resource/fi.bdw,sha256=xrd-KtKJ2UzOuTjJfXkQGqoYiWur4xjN2KC7rej3aEw,485
13
+ badwords/resource/fr.bdw,sha256=R9Eog8PzzNqmfOqCmisoyq_bHAmxqAhKQ7FeDMGRKDg,1009
14
+ badwords/resource/gr.bdw,sha256=fUXS3-fulVN9iXn9Rk26fPHyK1Bx9lzG4ANOlstWK3c,9130
15
+ badwords/resource/homoglyphs.json,sha256=iNzL-X4Q_IHqyc5a5v2RviZ5v5GuVQM4TldBykVQ5HM,515
16
+ badwords/resource/hu.bdw,sha256=Nwi_yQ8TMa5GSeG3dFGNk5XC9gfxVpvLCnRzJM6tZJQ,618
17
+ badwords/resource/in.bdw,sha256=AkA2apiqw5mjswcsj3fIboJANt9rO6XTgzeN8Kgkj2o,813
18
+ badwords/resource/it.bdw,sha256=WnGXJBsVCDmbqNHSkiVo05GI2QRVEl70IO_8rzoa8OQ,1377
19
+ badwords/resource/ja.bdw,sha256=EfHxZ73C8LWYYiwlCyK9lJV8sFuGoZxC2YYCUnkQ_eE,194
20
+ badwords/resource/ko.bdw,sha256=vkPIdZIufbVqbite_6IiXNLyd8DVIRTYAHL8ce50q6I,1919
21
+ badwords/resource/lt.bdw,sha256=Cfzi7xS7ZO3ZYMDwK0iVkaBJW4Zf44HiN5BbLuLx_O4,1476
22
+ badwords/resource/no.bdw,sha256=noIurraqWgBZfta8HmqJoB0odGD9jkkgt2joHp23B-A,543
23
+ badwords/resource/pl.bdw,sha256=CG8sPO7Z-RXDgrkfQ_FMIJncpEo9RkOGelJFm3ZaOIY,79009
24
+ badwords/resource/po.bdw,sha256=OGtiiU6aMmhyTAm1H8KiMXgQC3Rzwa8O-xgoWe-IOfo,669
25
+ badwords/resource/ro.bdw,sha256=8zQVyfvnvhFhqW7nFo3lqTYUbK1lIs8ZPn4TFTp4eA8,50
26
+ badwords/resource/ru.bdw,sha256=U1X99mV7h9XFm5emLFIQoeq-hNaWACG0ieGpRbASJjM,81318
27
+ badwords/resource/sp.bdw,sha256=kLglEbtkN6Z7x2MFfUqfJt1jtkN1dC_gtlepqC5ocDU,3790
28
+ badwords/resource/sw.bdw,sha256=NOgNNv3DbKbUQkOkU_-UBLzar9tbnBqbzUN1vlDVlpE,120
29
+ badwords/resource/th.bdw,sha256=_hrKv4Rmfqeo8eDFTK-l-lHGN3_GGE9jxS4V5XF19U0,186
30
+ badwords/resource/transliteration.json,sha256=jCKYi-EVbnLSKbgEoFd7mfKwGMV4V9ko2QiLwVdjuXs,445
31
+ badwords/resource/tu.bdw,sha256=9qI5G2chVRHn4sqmw0rqtYJv4KrU4s1PGcBYel0jfq8,8864
32
+ badwords/resource/ua.bdw,sha256=1X2ld6-Dz8ocnRvOuaAy8dWyaQLNLSphZ9YQrOvPwDI,44475
33
+ badwords/resource/unicode_mappings.json,sha256=Q7lLaLUtI_LJaY2aas8C3FpR1V29_KlZgdnLHr5S5iY,6662
34
+ badwords_py-2.1.0.dist-info/licenses/LICENSE,sha256=HbGX33ESqv7YurvAg5UKhkwh-7CC2do77sMZsSj2Z6s,1029
35
+ badwords_py-2.1.0.dist-info/METADATA,sha256=PrZPm1oqMoy4ns3Dp7j1TCmzuuSVG01p81U7HVMoGlY,6318
36
+ badwords_py-2.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
37
+ badwords_py-2.1.0.dist-info/top_level.txt,sha256=SIGWaKBUlVaNjwc85Ypds1-U94feUaPyyo-hsdA9yCk,9
38
+ badwords_py-2.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.10.2)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,9 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 FlacSy
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so.
6
+
7
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8
+
9
+ THE SOFTWARE IS PROVIDED "AS IS," WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT, OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1 @@
1
+ badwords