re-common 10.0.10__py3-none-any.whl → 10.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,8 @@
1
1
  import re
2
2
  import regex
3
3
 
4
- from re_common.v2.baselibrary.utils.stringutils import qj2bj, bj2qj, get_diacritic_variant, clean_html
4
+ from re_common.v2.baselibrary.utils.stringutils import qj2bj, bj2qj, get_diacritic_variant, clean_html, \
5
+ remove_spaces_between_chinese_characters
5
6
 
6
7
 
7
8
  class StringClear(object):
@@ -101,6 +102,7 @@ class StringClear(object):
101
102
  return self
102
103
 
103
104
  def remove_html_tag(self):
105
+ # 去除 html 标签
104
106
  import html
105
107
 
106
108
  self.obj_str = html.unescape(self.obj_str)
@@ -109,6 +111,11 @@ class StringClear(object):
109
111
 
110
112
  return self
111
113
 
114
+ def remove_spaces_in_chinese_characters(self):
115
+ # 匹配中文间的空格并替换为空字符串
116
+ self.obj_str = remove_spaces_between_chinese_characters(self.obj_str)
117
+ return self
118
+
112
119
  def get_str(self):
113
120
  return self.obj_str
114
121
 
@@ -122,6 +129,7 @@ def rel_clear(str_obj):
122
129
  .remove_html_tag() # html标签清理
123
130
  .remove_special_chars() # 移除特殊字符,仅保留字母、数字、空格和汉字 \w 已经包括所有 Unicode 字母 下划线 _ 会被保留
124
131
  .collapse_spaces() # 移除多余空格,连续多个空格变一个
132
+ .remove_spaces_in_chinese_characters() # 匹配中文间的空格并替换为空字符串
125
133
  .lower() # 小写
126
134
  .get_str() # 获取str
127
135
  .strip()) # 去掉空格
@@ -143,4 +143,12 @@ def clean_html(html):
143
143
  return parser.get_text()
144
144
 
145
145
 
146
+ def remove_spaces_between_chinese_characters(text):
147
+ """
148
+ 匹配中文间的空格并替换为空字符串
146
149
 
150
+ 这里没有选取 后面的一些扩展分区 是那些分区比较分散 都写进来消耗性能,
151
+ 认为只包含这些也够用了
152
+ """
153
+ pattern = r'(?<=[\u3400-\u9fff])\s+(?=[\u3400-\u9fff])'
154
+ return re.sub(pattern, '', text)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: re_common
3
- Version: 10.0.10
3
+ Version: 10.0.11
4
4
  Summary: a library about all python projects
5
5
  Home-page: https://gitee.com/xujiangios/re-common
6
6
  Author: vic
@@ -182,8 +182,8 @@ re_common/v2/baselibrary/utils/basehdfs.py,sha256=NVV5Q0OMPlM_zTrs9ZDoPJv29GQv5w
182
182
  re_common/v2/baselibrary/utils/json_cls.py,sha256=dHOkWafG9lbQDoub9cbDwT2fDjMKtblQnjFLeA4hECA,286
183
183
  re_common/v2/baselibrary/utils/n_ary_expression_tree.py,sha256=-05kO6G2Rth7CEK-5lfFrthFZ1Q0-0a7cni7mWZ-2gg,9172
184
184
  re_common/v2/baselibrary/utils/string_bool.py,sha256=EJnkSck4ofcIeJ6nLzAOVtlt6o1WBgvgVwIqJKj5Suc,2993
185
- re_common/v2/baselibrary/utils/string_clear.py,sha256=LDIf-3Czq1sXp-54aifXdXbdGUX7hpFBKqQa5Azj_lo,5861
186
- re_common/v2/baselibrary/utils/stringutils.py,sha256=GLXHAm8IulC_8hWrN2aiFQjsoOpjczvcVozmTJj86-A,3864
185
+ re_common/v2/baselibrary/utils/string_clear.py,sha256=pGxL9PlzQDM06sC0j6U0zYRemvsJ7-OOpfzS5ETCxAs,6258
186
+ re_common/v2/baselibrary/utils/stringutils.py,sha256=watvMwx8gzEj0Swz7e1cFUUQE1UkN81Fw-Hkjs4l8lo,4233
187
187
  re_common/vip/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
188
188
  re_common/vip/base_step_process.py,sha256=VXXiNj0I5CpzXIMCgOPU86bzDJkSBkUS-9CpZIl_GOk,205
189
189
  re_common/vip/baseencodeid.py,sha256=nERoe89ueFM52bG7xwJdflcZHk6T2RQQKbc5uUZc3RM,3272
@@ -210,8 +210,8 @@ re_common/vip/title/transform/TransformRegulationTitleToZt.py,sha256=LKRdIsWKues
210
210
  re_common/vip/title/transform/TransformStandardTitleToZt.py,sha256=-fCKAbSBzXVyQDCE61CalvR9E_QzQMA08QOO_NePFNI,5563
211
211
  re_common/vip/title/transform/TransformThesisTitleToZt.py,sha256=QS-uV0cQrpUFAcKucuJQ9Ue2VRQH-inmfn_X3IplfRo,5488
212
212
  re_common/vip/title/transform/__init__.py,sha256=m83-CWyRq_VHPYHaALEQlmXrkTdrZ3e4B_kCfBYE-uc,239
213
- re_common-10.0.10.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
214
- re_common-10.0.10.dist-info/METADATA,sha256=mOarqqiMSzMjAcu1sV0OxUGdwfANLray_3ZpjkAPxFg,582
215
- re_common-10.0.10.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
216
- re_common-10.0.10.dist-info/top_level.txt,sha256=_H9H23zoLIalm1AIY_KYTVh_H0ZnmjxQIxsvXtLv45o,10
217
- re_common-10.0.10.dist-info/RECORD,,
213
+ re_common-10.0.11.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
214
+ re_common-10.0.11.dist-info/METADATA,sha256=5g6SC3mrd2cryFaMmajqme2KGUoyoEkoDDwtqGeCYso,582
215
+ re_common-10.0.11.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
216
+ re_common-10.0.11.dist-info/top_level.txt,sha256=_H9H23zoLIalm1AIY_KYTVh_H0ZnmjxQIxsvXtLv45o,10
217
+ re_common-10.0.11.dist-info/RECORD,,