PyPI - korcen - Versions diffs - 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl - Mend

korcen 1.0.0py3-none-any.whl → 1.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

korcen/korcen.py CHANGED Viewed

@@ -137,7 +137,7 @@ FALSE_POSITIVE_PATTERNS_GENERAL = [
     '줫습니다', '줫음', '줫잖아', '줫겠지', '쫒아', '쫒는', '쫒기다', '쫒기라', '쫒기로',
     '쫒기를', '쫒기며', '쫒기는', '쫒기나', '쫒겨', '쫒겻', '쫒겼', '쫒았', '쫒다', '쫒고',
     '줫는', '줫어', '줬는', '줫군', '줬다', '줬어', '천조', '쫒기', '해줫더니', '줫다', '내쫒은',
-    '내쫒다', '좇아',
+    '내쫒다', '좇아', "날개",
     'ㅡ'
 ]
 FALSE_POSITIVE_PATTERNS_MINOR = [
@@ -162,7 +162,7 @@ FALSE_POSITIVE_PATTERNS_SEXUAL = [
     '빨간색', '초록색', '보라색', '청색', '핑크색', '남색', '검은색', '하양색', '주황색', '연두색',
     '스공', '스시', '스키장', '스킨', '스킬', '스틸', '스탑', '스트레스', '해야', '카시야스', '야스톤', '유니섹스', '스튜디오',
     '위대한', '소유자', '작업자', '자기위로', '위대하지', '암살자', '학자',
-    'freenude',
+    'freenude', "상자"
 ]
 FALSE_POSITIVE_PATTERNS_BELITTLE = [
     '려운지', '무서운지', '라운지', '운지법', '싸운지', '운지버섯', '운지린다', '깔보다', '깔보시',
@@ -356,7 +356,7 @@ def build_flexible_regex(pattern_in_processed_text: str):
     reverse_multi_map = {}
     for k, v in MULTI_CHAR_REPLACEMENTS.items():
-         reverse_multi_map.setdefault(v.lower(), set()).add(k)
+        reverse_multi_map.setdefault(v.lower(), set()).add(k)
     for char in pattern_in_processed_text:
         char_lower = char.lower()
@@ -456,7 +456,7 @@ def check_and_report_profanity_pattern(text: str, level: str = 'general'):
     if level == 'english' and BETTER_PROFANITY_LOADED:
         fp_regex_english = get_false_positive_regex('english')
         if fp_regex_english:
-             text_for_better_profanity = fp_regex_english.sub('', text_for_better_profanity)
+            text_for_better_profanity = fp_regex_english.sub('', text_for_better_profanity)
         text_for_better_profanity = text_for_better_profanity.replace("*", "")
@@ -467,12 +467,12 @@ def check_and_report_profanity_pattern(text: str, level: str = 'general'):
             censored_words_in_segment = re.findall(r'\b\w*▩+\w*\b', censored_text.lower())
             if censored_words_in_segment:
-                 original_words_split = text_for_better_profanity.split()
-                 censored_words_split = censored_text.split()
-                 for ow, cw in zip(original_words_split, censored_words_split):
-                      if '▩' in cw:
-                           detected_word_approx = ow
-                           break
+                original_words_split = text_for_better_profanity.split()
+                censored_words_split = censored_text.split()
+                for ow, cw in zip(original_words_split, censored_words_split):
+                    if '▩' in cw:
+                        detected_word_approx = ow
+                        break
             if detected_word_approx:
                 normalized_detected_word = normalize_for_custom_comparison(detected_word_approx)
@@ -550,7 +550,7 @@ def highlight_profanity(text: str, id: int = None, level: str = 'general', highl
     if level.lower() == 'all':
         levels = ['general', 'minor', 'sexual', 'belittle', 'race', 'parent',
-                  'special', 'politics']
+                'special', 'politics']
         if BETTER_PROFANITY_LOADED:
             levels.append('english')
@@ -625,7 +625,7 @@ def check_and_report_profanity_pattern(text: str, level: str = 'general'):
             return include_match.group(0)
     if processed_text in EXACT_MATCH_PROFANITY and level == 'general':
-         pass
+        pass
     fp_regex = get_false_positive_regex(level)
     text_without_false_positives = fp_regex.sub('', processed_text) if fp_regex else processed_text
@@ -641,14 +641,14 @@ def check_and_report_profanity_pattern(text: str, level: str = 'general'):
                 detected_word = None
                 for ow, cw in zip(original_words, censored_words):
                     if '▩' in cw:
-                         detected_word = ow.lower()
-                         break
+                        detected_word = ow.lower()
+                        break
                 if detected_word:
                     normalized_detected_word = normalize_for_custom_comparison(detected_word)
                     for exclude_regex in CUSTOM_EXCLUDE_REGEXES:
-                         if exclude_regex.fullmatch(normalized_detected_word):
-                             return None
+                        if exclude_regex.fullmatch(normalized_detected_word):
+                            return None
                     return detected_word
                 fallback_pattern = "english_profanity_detected"
@@ -678,7 +678,7 @@ def check_and_report_profanity_pattern(text: str, level: str = 'general'):
         normalized_detected_profanity = normalize_for_custom_comparison(detected_profanity_string)
         for exclude_regex in CUSTOM_EXCLUDE_REGEXES:
             if exclude_regex.fullmatch(normalized_detected_profanity):
-                 return None
+                return None
         return detected_profanity_string
@@ -686,7 +686,7 @@ def check_and_report_profanity_pattern(text: str, level: str = 'general'):
         normalized_processed_text = normalize_for_custom_comparison(processed_text)
         for exclude_regex in CUSTOM_EXCLUDE_REGEXES:
             if exclude_regex.fullmatch(normalized_processed_text):
-                 return None
+                return None
         return processed_text
     return None

{korcen-1.0.0.dist-info → korcen-1.0.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: korcen
-Version: 1.0.0
+Version: 1.0.1
 Summary: 한국어 비속어 검열
 Home-page: https://github.com/KR-korcen/korcen
 Author: Tanat

{korcen-1.0.0.dist-info → korcen-1.0.1.dist-info}/RECORD RENAMED Viewed

@@ -5,14 +5,14 @@ korcen/chinese.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 korcen/english.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 korcen/general.py,sha256=mGX9Sw-z9nT0hNYoPOPxD68dIi0nlhRnfqnUxLF3rLI,25595
 korcen/japanese.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-korcen/korcen.py,sha256=1e5SrP-uFXxzgilBOa6x5DOXB4iOt0Pn7QvOTS6T5Gk,64223
+korcen/korcen.py,sha256=7xJSNNGNXro0IsSZMByXC0GQq23NaZDTVjbL405P_kU,64220
 korcen/minor.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 korcen/parent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 korcen/politics.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 korcen/race.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 korcen/sexual.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 korcen/special.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-korcen-1.0.0.dist-info/METADATA,sha256=lUyJWDekepPANXIS8HyXDwlrjPIahBrLMqPfPKdVrwE,465
-korcen-1.0.0.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
-korcen-1.0.0.dist-info/top_level.txt,sha256=2LtNKXroHMOGgMokwiUPWwsJt5kmJ7yhkZ4bmtROZ5c,7
-korcen-1.0.0.dist-info/RECORD,,
+korcen-1.0.1.dist-info/METADATA,sha256=-4z5cBCvoxvDu-mUwI89x-1G6eF0XLntFA1hyPEbyMI,465
+korcen-1.0.1.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
+korcen-1.0.1.dist-info/top_level.txt,sha256=2LtNKXroHMOGgMokwiUPWwsJt5kmJ7yhkZ4bmtROZ5c,7
+korcen-1.0.1.dist-info/RECORD,,

{korcen-1.0.0.dist-info → korcen-1.0.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{korcen-1.0.0.dist-info → korcen-1.0.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

korcen 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

korcen 1.0.0py3-none-any.whl → 1.0.1py3-none-any.whl