korcen 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
korcen/korcen.py CHANGED
@@ -137,7 +137,7 @@ FALSE_POSITIVE_PATTERNS_GENERAL = [
137
137
  '줫습니다', '줫음', '줫잖아', '줫겠지', '쫒아', '쫒는', '쫒기다', '쫒기라', '쫒기로',
138
138
  '쫒기를', '쫒기며', '쫒기는', '쫒기나', '쫒겨', '쫒겻', '쫒겼', '쫒았', '쫒다', '쫒고',
139
139
  '줫는', '줫어', '줬는', '줫군', '줬다', '줬어', '천조', '쫒기', '해줫더니', '줫다', '내쫒은',
140
- '내쫒다', '좇아',
140
+ '내쫒다', '좇아', "날개",
141
141
  'ㅡ'
142
142
  ]
143
143
  FALSE_POSITIVE_PATTERNS_MINOR = [
@@ -162,7 +162,7 @@ FALSE_POSITIVE_PATTERNS_SEXUAL = [
162
162
  '빨간색', '초록색', '보라색', '청색', '핑크색', '남색', '검은색', '하양색', '주황색', '연두색',
163
163
  '스공', '스시', '스키장', '스킨', '스킬', '스틸', '스탑', '스트레스', '해야', '카시야스', '야스톤', '유니섹스', '스튜디오',
164
164
  '위대한', '소유자', '작업자', '자기위로', '위대하지', '암살자', '학자',
165
- 'freenude',
165
+ 'freenude', "상자"
166
166
  ]
167
167
  FALSE_POSITIVE_PATTERNS_BELITTLE = [
168
168
  '려운지', '무서운지', '라운지', '운지법', '싸운지', '운지버섯', '운지린다', '깔보다', '깔보시',
@@ -356,7 +356,7 @@ def build_flexible_regex(pattern_in_processed_text: str):
356
356
 
357
357
  reverse_multi_map = {}
358
358
  for k, v in MULTI_CHAR_REPLACEMENTS.items():
359
- reverse_multi_map.setdefault(v.lower(), set()).add(k)
359
+ reverse_multi_map.setdefault(v.lower(), set()).add(k)
360
360
 
361
361
  for char in pattern_in_processed_text:
362
362
  char_lower = char.lower()
@@ -456,7 +456,7 @@ def check_and_report_profanity_pattern(text: str, level: str = 'general'):
456
456
  if level == 'english' and BETTER_PROFANITY_LOADED:
457
457
  fp_regex_english = get_false_positive_regex('english')
458
458
  if fp_regex_english:
459
- text_for_better_profanity = fp_regex_english.sub('', text_for_better_profanity)
459
+ text_for_better_profanity = fp_regex_english.sub('', text_for_better_profanity)
460
460
 
461
461
  text_for_better_profanity = text_for_better_profanity.replace("*", "")
462
462
 
@@ -467,12 +467,12 @@ def check_and_report_profanity_pattern(text: str, level: str = 'general'):
467
467
  censored_words_in_segment = re.findall(r'\b\w*▩+\w*\b', censored_text.lower())
468
468
 
469
469
  if censored_words_in_segment:
470
- original_words_split = text_for_better_profanity.split()
471
- censored_words_split = censored_text.split()
472
- for ow, cw in zip(original_words_split, censored_words_split):
473
- if '▩' in cw:
474
- detected_word_approx = ow
475
- break
470
+ original_words_split = text_for_better_profanity.split()
471
+ censored_words_split = censored_text.split()
472
+ for ow, cw in zip(original_words_split, censored_words_split):
473
+ if '▩' in cw:
474
+ detected_word_approx = ow
475
+ break
476
476
 
477
477
  if detected_word_approx:
478
478
  normalized_detected_word = normalize_for_custom_comparison(detected_word_approx)
@@ -550,7 +550,7 @@ def highlight_profanity(text: str, id: int = None, level: str = 'general', highl
550
550
 
551
551
  if level.lower() == 'all':
552
552
  levels = ['general', 'minor', 'sexual', 'belittle', 'race', 'parent',
553
- 'special', 'politics']
553
+ 'special', 'politics']
554
554
 
555
555
  if BETTER_PROFANITY_LOADED:
556
556
  levels.append('english')
@@ -625,7 +625,7 @@ def check_and_report_profanity_pattern(text: str, level: str = 'general'):
625
625
  return include_match.group(0)
626
626
 
627
627
  if processed_text in EXACT_MATCH_PROFANITY and level == 'general':
628
- pass
628
+ pass
629
629
 
630
630
  fp_regex = get_false_positive_regex(level)
631
631
  text_without_false_positives = fp_regex.sub('', processed_text) if fp_regex else processed_text
@@ -641,14 +641,14 @@ def check_and_report_profanity_pattern(text: str, level: str = 'general'):
641
641
  detected_word = None
642
642
  for ow, cw in zip(original_words, censored_words):
643
643
  if '▩' in cw:
644
- detected_word = ow.lower()
645
- break
644
+ detected_word = ow.lower()
645
+ break
646
646
 
647
647
  if detected_word:
648
648
  normalized_detected_word = normalize_for_custom_comparison(detected_word)
649
649
  for exclude_regex in CUSTOM_EXCLUDE_REGEXES:
650
- if exclude_regex.fullmatch(normalized_detected_word):
651
- return None
650
+ if exclude_regex.fullmatch(normalized_detected_word):
651
+ return None
652
652
  return detected_word
653
653
 
654
654
  fallback_pattern = "english_profanity_detected"
@@ -678,7 +678,7 @@ def check_and_report_profanity_pattern(text: str, level: str = 'general'):
678
678
  normalized_detected_profanity = normalize_for_custom_comparison(detected_profanity_string)
679
679
  for exclude_regex in CUSTOM_EXCLUDE_REGEXES:
680
680
  if exclude_regex.fullmatch(normalized_detected_profanity):
681
- return None
681
+ return None
682
682
 
683
683
  return detected_profanity_string
684
684
 
@@ -686,7 +686,7 @@ def check_and_report_profanity_pattern(text: str, level: str = 'general'):
686
686
  normalized_processed_text = normalize_for_custom_comparison(processed_text)
687
687
  for exclude_regex in CUSTOM_EXCLUDE_REGEXES:
688
688
  if exclude_regex.fullmatch(normalized_processed_text):
689
- return None
689
+ return None
690
690
  return processed_text
691
691
 
692
692
  return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: korcen
3
- Version: 1.0.0
3
+ Version: 1.0.1
4
4
  Summary: 한국어 비속어 검열
5
5
  Home-page: https://github.com/KR-korcen/korcen
6
6
  Author: Tanat
@@ -5,14 +5,14 @@ korcen/chinese.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  korcen/english.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  korcen/general.py,sha256=mGX9Sw-z9nT0hNYoPOPxD68dIi0nlhRnfqnUxLF3rLI,25595
7
7
  korcen/japanese.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- korcen/korcen.py,sha256=1e5SrP-uFXxzgilBOa6x5DOXB4iOt0Pn7QvOTS6T5Gk,64223
8
+ korcen/korcen.py,sha256=7xJSNNGNXro0IsSZMByXC0GQq23NaZDTVjbL405P_kU,64220
9
9
  korcen/minor.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  korcen/parent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  korcen/politics.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  korcen/race.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  korcen/sexual.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  korcen/special.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- korcen-1.0.0.dist-info/METADATA,sha256=lUyJWDekepPANXIS8HyXDwlrjPIahBrLMqPfPKdVrwE,465
16
- korcen-1.0.0.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
17
- korcen-1.0.0.dist-info/top_level.txt,sha256=2LtNKXroHMOGgMokwiUPWwsJt5kmJ7yhkZ4bmtROZ5c,7
18
- korcen-1.0.0.dist-info/RECORD,,
15
+ korcen-1.0.1.dist-info/METADATA,sha256=-4z5cBCvoxvDu-mUwI89x-1G6eF0XLntFA1hyPEbyMI,465
16
+ korcen-1.0.1.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
17
+ korcen-1.0.1.dist-info/top_level.txt,sha256=2LtNKXroHMOGgMokwiUPWwsJt5kmJ7yhkZ4bmtROZ5c,7
18
+ korcen-1.0.1.dist-info/RECORD,,
File without changes