badwords-py 2.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. badwords_py-2.1.0/LICENSE +9 -0
  2. badwords_py-2.1.0/PKG-INFO +201 -0
  3. badwords_py-2.1.0/README.md +177 -0
  4. badwords_py-2.1.0/badwords/__init__.py +5 -0
  5. badwords_py-2.1.0/badwords/check.py +148 -0
  6. badwords_py-2.1.0/badwords/exceptions.py +16 -0
  7. badwords_py-2.1.0/badwords/resource/br.bdw +119 -0
  8. badwords_py-2.1.0/badwords/resource/character_frequency.json +12 -0
  9. badwords_py-2.1.0/badwords/resource/cz.bdw +51 -0
  10. badwords_py-2.1.0/badwords/resource/da.bdw +60 -0
  11. badwords_py-2.1.0/badwords/resource/de.bdw +62 -0
  12. badwords_py-2.1.0/badwords/resource/du.bdw +39 -0
  13. badwords_py-2.1.0/badwords/resource/en.bdw +66 -0
  14. badwords_py-2.1.0/badwords/resource/fi.bdw +57 -0
  15. badwords_py-2.1.0/badwords/resource/fr.bdw +112 -0
  16. badwords_py-2.1.0/badwords/resource/gr.bdw +470 -0
  17. badwords_py-2.1.0/badwords/resource/homoglyphs.json +28 -0
  18. badwords_py-2.1.0/badwords/resource/hu.bdw +79 -0
  19. badwords_py-2.1.0/badwords/resource/in.bdw +119 -0
  20. badwords_py-2.1.0/badwords/resource/it.bdw +153 -0
  21. badwords_py-2.1.0/badwords/resource/ja.bdw +24 -0
  22. badwords_py-2.1.0/badwords/resource/ko.bdw +175 -0
  23. badwords_py-2.1.0/badwords/resource/lt.bdw +153 -0
  24. badwords_py-2.1.0/badwords/resource/no.bdw +62 -0
  25. badwords_py-2.1.0/badwords/resource/pl.bdw +6834 -0
  26. badwords_py-2.1.0/badwords/resource/po.bdw +83 -0
  27. badwords_py-2.1.0/badwords/resource/ro.bdw +8 -0
  28. badwords_py-2.1.0/badwords/resource/ru.bdw +3693 -0
  29. badwords_py-2.1.0/badwords/resource/sp.bdw +395 -0
  30. badwords_py-2.1.0/badwords/resource/sw.bdw +16 -0
  31. badwords_py-2.1.0/badwords/resource/th.bdw +13 -0
  32. badwords_py-2.1.0/badwords/resource/transliteration.json +9 -0
  33. badwords_py-2.1.0/badwords/resource/tu.bdw +780 -0
  34. badwords_py-2.1.0/badwords/resource/ua.bdw +1965 -0
  35. badwords_py-2.1.0/badwords/resource/unicode_mappings.json +114 -0
  36. badwords_py-2.1.0/badwords/text_processor.py +207 -0
  37. badwords_py-2.1.0/badwords_py.egg-info/PKG-INFO +201 -0
  38. badwords_py-2.1.0/badwords_py.egg-info/SOURCES.txt +41 -0
  39. badwords_py-2.1.0/badwords_py.egg-info/dependency_links.txt +1 -0
  40. badwords_py-2.1.0/badwords_py.egg-info/not-zip-safe +1 -0
  41. badwords_py-2.1.0/badwords_py.egg-info/top_level.txt +1 -0
  42. badwords_py-2.1.0/pyproject.toml +48 -0
  43. badwords_py-2.1.0/setup.cfg +4 -0
@@ -0,0 +1,9 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 FlacSy
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so.
6
+
7
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8
+
9
+ THE SOFTWARE IS PROVIDED "AS IS," WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT, OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,201 @@
1
+ Metadata-Version: 2.4
2
+ Name: badwords-py
3
+ Version: 2.1.0
4
+ Summary: This is a library for effective moderation of content.
5
+ Author-email: iamlostshe <vanamelcikov7275@gmail.com>, FlacSy <flacsy.x@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/FlacSy/badwords
8
+ Project-URL: Repository, https://github.com/FlacSy/badwords.git
9
+ Project-URL: Issues, https://github.com/FlacSy/badwords/issues
10
+ Keywords: moderation,content filtering,obscenity detection,mood analysis,image moderation
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Development Status :: 5 - Production/Stable
18
+ Classifier: Intended Audience :: Developers
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Requires-Python: >=3.10
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Dynamic: license-file
24
+
25
+ <div align="center">
26
+
27
+ # 🚫 BadWords
28
+
29
+ **High-performance profanity filter for Python with multilingual support and evasion detection.**
30
+
31
+ [![Python Version](https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12%20%7C%203.13-blue?style=flat-square)](https://www.python.org/)
32
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg?style=flat-square)](https://opensource.org/licenses/MIT)
33
+ [![Build Status](https://img.shields.io/badge/build-passing-brightgreen?style=flat-square)](#)
34
+ [![Downloads](https://img.shields.io/pypi/dm/bad-words?style=flat-square&color=orange)](https://pypi.org/project/bad-words/)
35
+
36
+ [Installation](#-installation) • [Quick Start](#-quick-start) • [Supported Languages](#-supported-languages) • [Advanced Evasion Detection](#-advanced-evasion-detection)
37
+
38
+ </div>
39
+
40
+ ---
41
+
42
+ ## 📖 Description
43
+
44
+ `BadWords` is a sophisticated profanity filtering library designed to clean up user-generated content. Unlike simple keyword matching, it uses **similarity scoring**, **homoglyph detection**, and **transliteration** to catch even the most cleverly disguised insults.
45
+
46
+ ## 📦 Installation
47
+
48
+ ### Requirements
49
+ - **Recommended:** Python 3.13
50
+ - **Minimum:** Python 3.10+
51
+
52
+ ### Install via GitHub
53
+ ```bash
54
+ pip install git+[https://github.com/FlacSy/badwords.git](https://github.com/FlacSy/badwords.git)
55
+
56
+ ```
57
+
58
+ ### Install via PyPI
59
+ ```bash
60
+ pip install badwords-py
61
+ ```
62
+
63
+ ---
64
+
65
+ ## ⚡ Quick Start
66
+
67
+ ### Basic Initialization
68
+
69
+ ```python
70
+ from badwords import ProfanityFilter
71
+
72
+ # Initialize filter
73
+ p = ProfanityFilter()
74
+
75
+ # Load specific languages (e.g., English and Russian)
76
+ p.init(languages=["en", "ru"])
77
+
78
+ # Or load ALL 26+ supported languages
79
+ p.init()
80
+
81
+ ```
82
+
83
+ ### Checking and Filtering Text
84
+
85
+ ```python
86
+ text = "Some very b4d text here"
87
+
88
+ # 1. Simple check (Returns Boolean)
89
+ is_bad = p.filter_text(text)
90
+ print(is_bad) # True
91
+
92
+ # 2. Censoring text (Returns String)
93
+ clean_text = p.filter_text(text, replace_character="*")
94
+ print(clean_text) # "Some very *** text here"
95
+
96
+ ```
97
+
98
+ ---
99
+
100
+ ## 🛠 Methods & API
101
+
102
+ ### `filter_text(text, match_threshold=0.8, replace_character=None)`
103
+
104
+ The core method of the library.
105
+
106
+ | Parameter | Type | Default | Description |
107
+ | --- | --- | --- | --- |
108
+ | `text` | `str` | Required | Input text to check. |
109
+ | `match_threshold` | `float` | `0.8` | Similarity threshold (1.0 = exact match, 0.7 = aggressive). |
110
+ | `replace_character` | `str/None` | `None` | If provided, returns censored string. If None, returns bool. |
111
+
112
+ > [!WARNING]
113
+ > **Performance Tip:** Using `match_threshold < 1.0` enables fuzzy matching which is slower. Use `1.0` for high-traffic real-time filtering, or `0.95` for a good balance.
114
+
115
+ ---
116
+
117
+ ## 🧩 Advanced Evasion Detection
118
+
119
+ Standard filters are easy to bypass. `BadWords` is built to detect:
120
+
121
+ * **Homoglyphs:** Detects `hеllo` (using Cyrillic 'е') or `h4llo` (numbers).
122
+ * **Transliteration:** Automatically handles mapping between Cyrillic and Latin alphabets.
123
+ * **Normalization:** Strips diacritics, special characters, and decorative Unicode symbols.
124
+ * **Similarity Analysis:** Uses fuzzy matching to find words with deliberate typos.
125
+
126
+ ### Examples of detected evasions:
127
+
128
+ ```python
129
+ _filter.filter_text("hеllо") # Mixed alphabets (Cyrillic + Latin) -> DETECTED
130
+ _filter.filter_text("h3ll0") # Character substitution -> DETECTED
131
+ _filter.filter_text("h⍺llo") # Mathematical/Greek symbols -> DETECTED
132
+ _filter.filter_text("привет") # Transliterated matches -> DETECTED
133
+
134
+ ```
135
+
136
+ ---
137
+
138
+ ## 🌍 Supported Languages
139
+
140
+ `BadWords` currently supports **26 languages** out of the box:
141
+
142
+ | Code | Language | Code | Language | Code | Language |
143
+ | --- | --- | --- | --- | --- | --- |
144
+ | `en` | English | `ru` | Russian | `ua` | Ukrainian |
145
+ | `de` | German | `fr` | French | `it` | Italian |
146
+ | `sp` | Spanish | `pl` | Polish | `cz` | Czech |
147
+ | `ja` | Japanese | `ko` | Korean | `th` | Thai |
148
+ | ... | & 14 more | | | | |
149
+
150
+ *Use `p.get_all_languages()` to see the full list in your code.*
151
+
152
+ ---
153
+
154
+ ## 🚀 Full Integration Example
155
+
156
+ ```python
157
+ from badwords import ProfanityFilter
158
+
159
+ def monitor_chat():
160
+ # Setup for a global chat
161
+ profanity_filter = ProfanityFilter()
162
+ profanity_filter.init(["en", "ru", "de"])
163
+
164
+ # Custom project-specific banned words
165
+ profanity_filter.add_words(["spam_link_v1", "scam_bot_99"])
166
+
167
+ user_input = "Hey! Check out this b.a.d.w.o.r.d"
168
+
169
+ # Moderate with high accuracy
170
+ is_offensive = profanity_filter.filter_text(user_input, match_threshold=0.95)
171
+
172
+ if is_offensive:
173
+ print("Message blocked: Contains restricted language.")
174
+ else:
175
+ # Proceed with processing
176
+ pass
177
+
178
+ if __name__ == "__main__":
179
+ monitor_chat()
180
+
181
+ ```
182
+
183
+ ---
184
+
185
+ ## 🤝 Contributing
186
+
187
+ Contributions are what make the open-source community an amazing place to learn, inspire, and create.
188
+
189
+ 1. Fork the Project
190
+ 2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`)
191
+ 3. Commit your Changes (`git commit -m 'Add AmazingFeature'`)
192
+ 4. Push to the Branch (`git push origin feature/AmazingFeature`)
193
+ 5. Open a Pull Request
194
+
195
+ ## 📄 License
196
+
197
+ Distributed under the MIT License. See `LICENSE` for more information.
198
+
199
+ <div align="center">
200
+ <sub>Developed with ❤️ by <a href="https://github.com/FlacSy">FlacSy</a></sub>
201
+ </div>
@@ -0,0 +1,177 @@
1
+ <div align="center">
2
+
3
+ # 🚫 BadWords
4
+
5
+ **High-performance profanity filter for Python with multilingual support and evasion detection.**
6
+
7
+ [![Python Version](https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12%20%7C%203.13-blue?style=flat-square)](https://www.python.org/)
8
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg?style=flat-square)](https://opensource.org/licenses/MIT)
9
+ [![Build Status](https://img.shields.io/badge/build-passing-brightgreen?style=flat-square)](#)
10
+ [![Downloads](https://img.shields.io/pypi/dm/bad-words?style=flat-square&color=orange)](https://pypi.org/project/bad-words/)
11
+
12
+ [Installation](#-installation) • [Quick Start](#-quick-start) • [Supported Languages](#-supported-languages) • [Advanced Evasion Detection](#-advanced-evasion-detection)
13
+
14
+ </div>
15
+
16
+ ---
17
+
18
+ ## 📖 Description
19
+
20
+ `BadWords` is a sophisticated profanity filtering library designed to clean up user-generated content. Unlike simple keyword matching, it uses **similarity scoring**, **homoglyph detection**, and **transliteration** to catch even the most cleverly disguised insults.
21
+
22
+ ## 📦 Installation
23
+
24
+ ### Requirements
25
+ - **Recommended:** Python 3.13
26
+ - **Minimum:** Python 3.10+
27
+
28
+ ### Install via GitHub
29
+ ```bash
30
+ pip install git+[https://github.com/FlacSy/badwords.git](https://github.com/FlacSy/badwords.git)
31
+
32
+ ```
33
+
34
+ ### Install via PyPI
35
+ ```bash
36
+ pip install badwords-py
37
+ ```
38
+
39
+ ---
40
+
41
+ ## ⚡ Quick Start
42
+
43
+ ### Basic Initialization
44
+
45
+ ```python
46
+ from badwords import ProfanityFilter
47
+
48
+ # Initialize filter
49
+ p = ProfanityFilter()
50
+
51
+ # Load specific languages (e.g., English and Russian)
52
+ p.init(languages=["en", "ru"])
53
+
54
+ # Or load ALL 26+ supported languages
55
+ p.init()
56
+
57
+ ```
58
+
59
+ ### Checking and Filtering Text
60
+
61
+ ```python
62
+ text = "Some very b4d text here"
63
+
64
+ # 1. Simple check (Returns Boolean)
65
+ is_bad = p.filter_text(text)
66
+ print(is_bad) # True
67
+
68
+ # 2. Censoring text (Returns String)
69
+ clean_text = p.filter_text(text, replace_character="*")
70
+ print(clean_text) # "Some very *** text here"
71
+
72
+ ```
73
+
74
+ ---
75
+
76
+ ## 🛠 Methods & API
77
+
78
+ ### `filter_text(text, match_threshold=0.8, replace_character=None)`
79
+
80
+ The core method of the library.
81
+
82
+ | Parameter | Type | Default | Description |
83
+ | --- | --- | --- | --- |
84
+ | `text` | `str` | Required | Input text to check. |
85
+ | `match_threshold` | `float` | `0.8` | Similarity threshold (1.0 = exact match, 0.7 = aggressive). |
86
+ | `replace_character` | `str/None` | `None` | If provided, returns censored string. If None, returns bool. |
87
+
88
+ > [!WARNING]
89
+ > **Performance Tip:** Using `match_threshold < 1.0` enables fuzzy matching which is slower. Use `1.0` for high-traffic real-time filtering, or `0.95` for a good balance.
90
+
91
+ ---
92
+
93
+ ## 🧩 Advanced Evasion Detection
94
+
95
+ Standard filters are easy to bypass. `BadWords` is built to detect:
96
+
97
+ * **Homoglyphs:** Detects `hеllo` (using Cyrillic 'е') or `h4llo` (numbers).
98
+ * **Transliteration:** Automatically handles mapping between Cyrillic and Latin alphabets.
99
+ * **Normalization:** Strips diacritics, special characters, and decorative Unicode symbols.
100
+ * **Similarity Analysis:** Uses fuzzy matching to find words with deliberate typos.
101
+
102
+ ### Examples of detected evasions:
103
+
104
+ ```python
105
+ _filter.filter_text("hеllо") # Mixed alphabets (Cyrillic + Latin) -> DETECTED
106
+ _filter.filter_text("h3ll0") # Character substitution -> DETECTED
107
+ _filter.filter_text("h⍺llo") # Mathematical/Greek symbols -> DETECTED
108
+ _filter.filter_text("привет") # Transliterated matches -> DETECTED
109
+
110
+ ```
111
+
112
+ ---
113
+
114
+ ## 🌍 Supported Languages
115
+
116
+ `BadWords` currently supports **26 languages** out of the box:
117
+
118
+ | Code | Language | Code | Language | Code | Language |
119
+ | --- | --- | --- | --- | --- | --- |
120
+ | `en` | English | `ru` | Russian | `ua` | Ukrainian |
121
+ | `de` | German | `fr` | French | `it` | Italian |
122
+ | `sp` | Spanish | `pl` | Polish | `cz` | Czech |
123
+ | `ja` | Japanese | `ko` | Korean | `th` | Thai |
124
+ | ... | & 14 more | | | | |
125
+
126
+ *Use `p.get_all_languages()` to see the full list in your code.*
127
+
128
+ ---
129
+
130
+ ## 🚀 Full Integration Example
131
+
132
+ ```python
133
+ from badwords import ProfanityFilter
134
+
135
+ def monitor_chat():
136
+ # Setup for a global chat
137
+ profanity_filter = ProfanityFilter()
138
+ profanity_filter.init(["en", "ru", "de"])
139
+
140
+ # Custom project-specific banned words
141
+ profanity_filter.add_words(["spam_link_v1", "scam_bot_99"])
142
+
143
+ user_input = "Hey! Check out this b.a.d.w.o.r.d"
144
+
145
+ # Moderate with high accuracy
146
+ is_offensive = profanity_filter.filter_text(user_input, match_threshold=0.95)
147
+
148
+ if is_offensive:
149
+ print("Message blocked: Contains restricted language.")
150
+ else:
151
+ # Proceed with processing
152
+ pass
153
+
154
+ if __name__ == "__main__":
155
+ monitor_chat()
156
+
157
+ ```
158
+
159
+ ---
160
+
161
+ ## 🤝 Contributing
162
+
163
+ Contributions are what make the open-source community an amazing place to learn, inspire, and create.
164
+
165
+ 1. Fork the Project
166
+ 2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`)
167
+ 3. Commit your Changes (`git commit -m 'Add AmazingFeature'`)
168
+ 4. Push to the Branch (`git push origin feature/AmazingFeature`)
169
+ 5. Open a Pull Request
170
+
171
+ ## 📄 License
172
+
173
+ Distributed under the MIT License. See `LICENSE` for more information.
174
+
175
+ <div align="center">
176
+ <sub>Developed with ❤️ by <a href="https://github.com/FlacSy">FlacSy</a></sub>
177
+ </div>
@@ -0,0 +1,5 @@
1
+ """A library for effective moderation of content."""
2
+
3
+ from .check import ProfanityFilter
4
+
5
+ __all__ = ["ProfanityFilter"]
@@ -0,0 +1,148 @@
1
+ """Module for checking text for badwords."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from difflib import SequenceMatcher
6
+ from pathlib import Path
7
+ from typing import TYPE_CHECKING
8
+
9
+ from .exceptions import NotSupportedLanguage
10
+ from .text_processor import TextProcessor
11
+
12
+ if TYPE_CHECKING:
13
+ from typing import Self
14
+ else:
15
+ Self = "NotSupportedLanguage"
16
+
17
+
18
+ class ProfanityFilter:
19
+ """A class for filtering profanity from text."""
20
+
21
+ def init(
22
+ self: Self,
23
+ languages: list[str] | None = None,
24
+ processing_normalize_text: bool = True,
25
+ processing_aggressive_normalize: bool = True,
26
+ processing_transliterate: bool = True,
27
+ processing_replace_homoglyphs: bool = True,
28
+ ) -> None:
29
+ """Initialize the profanity filter.
30
+
31
+ :param languages: List of languages to load profanity words for.
32
+ :param processing_normalize_text: Enable text normalization for all loaded words and word to filter
33
+ :param processing_aggressive_normalize: Enable aggressive text normalization for all loaded words and word to filter
34
+ :param processing_transliterate: Enable transliteration of text for all loaded words and word to filter
35
+ :param processing_replace_homoglyphs: Enable replacing of homoglyphs in the text for all loaded words and word to filter
36
+ """
37
+ self.resource_dir = Path(__file__).parent / "resource"
38
+
39
+ self.text_processor = TextProcessor(
40
+ processing_normalize_text=processing_normalize_text,
41
+ processing_aggressive_normalize=processing_aggressive_normalize,
42
+ processing_transliterate=processing_transliterate,
43
+ processing_replace_homoglyphs=processing_replace_homoglyphs
44
+ )
45
+
46
+ self.language_files = self.initialize_language_files()
47
+
48
+ if languages:
49
+ if all(i in self.language_files for i in languages):
50
+ self.language_files = languages
51
+ else:
52
+ raise NotSupportedLanguage
53
+
54
+ self.bad_words = self.initialize_bad_words()
55
+
56
+ def initialize_language_files(self: Self) -> list[str]:
57
+ """Initialize language files.
58
+
59
+ :return: Dictionary mapping language names to file paths.
60
+ """
61
+ return [str(path)[-6:-4] for path in (self.resource_dir).iterdir()]
62
+
63
+ def initialize_bad_words(self: Self) -> set[str]:
64
+ """Initialize the set of bad words from language files."""
65
+ bad_words: set[str] = set()
66
+
67
+ for lang in self.language_files:
68
+ try:
69
+ # Sanitize the language code to prevent path traversal
70
+ lang = lang.lower().strip()
71
+ if not lang.isalpha():
72
+ continue
73
+
74
+ file_path = self.resource_dir / f"{lang}.bdw"
75
+ if not file_path.exists():
76
+ continue
77
+
78
+ with file_path.open(encoding="utf-8") as f:
79
+ words = f.read().splitlines()
80
+ processed_words = [self.text_processor.process_text(word) for word in words]
81
+ bad_words.update(processed_words)
82
+ except Exception as e:
83
+ print(f"Error loading language file for {lang}: {e}")
84
+ continue
85
+
86
+ return bad_words
87
+
88
+ def add_words(self: Self, words: list[str]) -> None:
89
+ """Add custom profanity words to the filter.
90
+
91
+ :param words: List of custom profanity words.
92
+ """
93
+ processed_words = [self.text_processor.process_text(word) for word in words]
94
+ self.bad_words.update(processed_words)
95
+
96
+ def similar(self: Self, a: str, b: str) -> float:
97
+ """Compute similarity ratio between two strings.
98
+
99
+ :param a: First string.
100
+ :param b: Second string.
101
+ :return: Similarity ratio.
102
+ """
103
+ return SequenceMatcher(None, a, b).ratio()
104
+
105
+ def filter_text(
106
+ self: Self, text: str,
107
+ match_threshold: float | None = None,
108
+ replace_character: str | None = None,
109
+ ) -> bool | str:
110
+ """Check if the given text contains profanity.
111
+
112
+ :param text: Input text to check.
113
+ :param match_threshold: Threshold for similarity match.
114
+ :param replace_character: Character to replace profane words with. If None,
115
+ return True/False.
116
+ :return: True if profanity found, False otherwise. If replace_character is
117
+ specified, return filtered text.
118
+ """
119
+ if not match_threshold:
120
+ match_threshold = 1
121
+
122
+ # Process the input text through all transformations
123
+ processed_text = self.text_processor.process_text(text)
124
+ words = processed_text.split()
125
+
126
+ for word in words:
127
+ # Check exact match
128
+ if word in self.bad_words:
129
+ if replace_character:
130
+ return text.replace(word, replace_character * len(word))
131
+ return True
132
+
133
+ # Check similar matches if threshold is less than 1
134
+ if 0 < match_threshold < 1:
135
+ for bad_word in self.bad_words:
136
+ if self.similar(word, bad_word) > match_threshold:
137
+ if replace_character:
138
+ return text.replace(word, replace_character * len(word))
139
+ return True
140
+
141
+ return False
142
+
143
+ def get_all_languages(self: Self) -> list[str]:
144
+ """Get a list of all available languages.
145
+
146
+ :return: List of all language names.
147
+ """
148
+ return self.language_files
@@ -0,0 +1,16 @@
1
+ """Exceptions module."""
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ if TYPE_CHECKING:
6
+ from typing import Self
7
+ else:
8
+ Self = "NotSupportedLanguage"
9
+
10
+
11
+ class NotSupportedLanguage(BaseException):
12
+ """Unsupport language check."""
13
+
14
+ def __str__(self: Self) -> str:
15
+ """String-like representation of exception."""
16
+ return "This language is not supported"
@@ -0,0 +1,119 @@
1
+ foderíamos
2
+ fodêssemos
3
+ foderíeis
4
+ fodêramos
5
+ fodêsseis
6
+ merdimbuca
7
+ putariinha
8
+ chibundas
9
+ chibundos
10
+ cácété
11
+ foderemos
12
+ fodêreis
13
+ fodíamos
14
+ fudeção
15
+ putariona
16
+ boasudas
17
+ boazudas
18
+ cassetas
19
+ chibunda
20
+ chibundo
21
+ culhões
22
+ cunhões
23
+ foderdes
24
+ fodereis
25
+ foderiam
26
+ foderias
27
+ fodermos
28
+ foderás
29
+ foderão
30
+ fodessem
31
+ fodesses
32
+ fodestes
33
+ fodíeis
34
+ fudecão
35
+ fudeçao
36
+ fudidas?
37
+ fudidos?
38
+ peithola
39
+ putarias
40
+ xibundas
41
+ xibundos
42
+ babacas
43
+ boasuda
44
+ boazuda
45
+ cacetas
46
+ cacetes
47
+ casseta
48
+ cassete
49
+ culhoes
50
+ culhão
51
+ cunhoes
52
+ cunhão
53
+ curalho
54
+ fodamos
55
+ fodasse
56
+ fodemos
57
+ fodendo
58
+ foderam
59
+ foderas
60
+ foderei
61
+ foderem
62
+ foderes
63
+ foderia
64
+ foderum
65
+ foderá
66
+ fodesse
67
+ fodeste
68
+ fodinha
69
+ fudecao
70
+ fudedor
71
+ fudendo
72
+ furonas
73
+ putaria
74
+ putães
75
+ putãos
76
+ putões
77
+ xibunda
78
+ xibundo
79
+ babaca
80
+ bostas
81
+ caceta
82
+ cacete
83
+ culhao
84
+ cunhao
85
+ fodais
86
+ fodeis
87
+ fodera
88
+ fodete
89
+ fodiam
90
+ fodias
91
+ fodão
92
+ furona
93
+ furão
94
+ merdas
95
+ putão
96
+ sefoda
97
+ bosta
98
+ fodam
99
+ fodao
100
+ fodas
101
+ fodei
102
+ fodem
103
+ foder
104
+ fodes
105
+ fodeu
106
+ fodia
107
+ fuder
108
+ fudeu
109
+ furao
110
+ merda
111
+ porra
112
+ putos
113
+ foda
114
+ fode
115
+ fodi
116
+ fodo
117
+ puto
118
+ cus
119
+ cu
@@ -0,0 +1,12 @@
1
+ {
2
+ "a": ["4", "@", "а"],
3
+ "b": ["8", "6", "в"],
4
+ "e": ["3", "ё", "е"],
5
+ "g": ["9", "6"],
6
+ "i": ["1", "!", "і"],
7
+ "l": ["1", "|", "!"],
8
+ "o": ["0", "о"],
9
+ "s": ["5", "$", "ѕ"],
10
+ "t": ["7", "+"],
11
+ "z": ["2", "z"]
12
+ }