re-common 10.0.0__py3-none-any.whl → 10.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ import copy
1
2
  import re
2
3
  import string
3
4
 
@@ -248,6 +249,16 @@ def AuthorRatio(
248
249
  if len(l1) == len(l2) and (is_same_or_initials_match(l1, l2) or set(l1) == set(l2)):
249
250
  return 1
250
251
 
252
+ # 在这里针对上面一条算法再增加一条算法,先对list 排序在对他进行上面的对比
253
+ # 如果长度相等 简写也是单词的首字母 那么两个名字一致 举例:Guo, Qiang @@ Q. Guo
254
+ sort_l1 = copy.deepcopy(l1)
255
+ sort_l2 = copy.deepcopy(l2)
256
+ sort_l1.sort()
257
+ sort_l2.sort()
258
+ if len(sort_l1) == len(sort_l2) and (is_same_or_initials_match(sort_l1, sort_l2) or set(sort_l1) == set(sort_l2)):
259
+ return 0.99
260
+
261
+
251
262
  ##############################################################
252
263
  # 以上为情况穷举情况,以下为其他情况的相似率计算
253
264
  ##############################################################
@@ -262,7 +273,7 @@ def AuthorRatio(
262
273
  len_ratio = len1 / len2 if len1 > len2 else len2 / len1
263
274
 
264
275
  # 计算归一化的 Indel 相似度。 对于比率<score_cutoff,返回0。
265
- end_ratio = normal_end_ratio = Jaro.normalized_similarity(s1, s2)
276
+ end_ratio = normal_end_ratio = Jaro.normalized_similarity(s1.lower(), s2.lower())
266
277
 
267
278
  # 需要对作者的比率分布进行调研决定哪些是小比率哪些是大比率
268
279
  if len_ratio > 1.5 and len_ratio < 3:
@@ -287,7 +298,7 @@ def AuthorRatio(
287
298
 
288
299
  # 首字母相同提分
289
300
  # if is_contained(extract_initials(s1), extract_initials(s2)):
290
- if is_contained_list([i[:1] for i in l1], [i[:1] for i in l2]):
301
+ if is_contained_list([i[:1].lower() for i in l1], [i[:1].lower() for i in l2]):
291
302
  # 应该提分
292
303
  end_ratio = end_ratio * 1.05
293
304
  else:
@@ -302,7 +313,7 @@ def AuthorRatio(
302
313
  end_ratio = end_ratio * 1.1
303
314
 
304
315
  if l1[0] != l2[0]:
305
- end_ratio = end_ratio * Jaro.normalized_similarity(l1[0], l2[0])
316
+ end_ratio = end_ratio * Jaro.normalized_similarity(l1[0].lower(), l2[0].lower())
306
317
 
307
318
  # 如果字符串本身的相似度高 应该拉上去 否者应该拉下来
308
319
  return min(end_ratio, 1) * 0.5 + normal_end_ratio * 0.5
@@ -63,6 +63,7 @@ def get_diacritic_variant(char1):
63
63
  return base_char1
64
64
 
65
65
  def get_alphabetic_ratio(text: str) -> float:
66
+ # 返回字母型字符所占比例
66
67
  if not text:
67
68
  return 0
68
69
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: re_common
3
- Version: 10.0.0
3
+ Version: 10.0.1
4
4
  Summary: a library about all python projects
5
5
  Home-page: https://gitee.com/xujiangios/re-common
6
6
  Author: vic
@@ -173,13 +173,13 @@ re_common/v2/baselibrary/tools/text_matcher.py,sha256=F4WtLO-b7H6V9TIvOntCD9ZXSQ
173
173
  re_common/v2/baselibrary/tools/unionfind_tools.py,sha256=VYHZZPXwBYljsm7TjV1B6iCgDn3O3btzNf9hMvQySVU,2965
174
174
  re_common/v2/baselibrary/utils/BusinessStringUtil.py,sha256=dxrWO800wElZM_4aKolUHSPBYZlxqzXukE4M-LZ13jA,2644
175
175
  re_common/v2/baselibrary/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
176
- re_common/v2/baselibrary/utils/author_smi.py,sha256=_P3I5JXvxHqNNWUwhAyHiJuBFiC0tXvGD8-_HxNiuEU,11051
176
+ re_common/v2/baselibrary/utils/author_smi.py,sha256=wkuoGEBNM28k8D1E83vBxJD5N4xgzr6aAQFMVPJ2tnc,11585
177
177
  re_common/v2/baselibrary/utils/basedict.py,sha256=tSV85pARe8ZQDY77_h_heS81EWwcgJW076DcA9WQyjY,1161
178
178
  re_common/v2/baselibrary/utils/basehdfs.py,sha256=NVV5Q0OMPlM_zTrs9ZDoPJv29GQv5wi9-AP1us5dBrQ,4651
179
179
  re_common/v2/baselibrary/utils/json_cls.py,sha256=dHOkWafG9lbQDoub9cbDwT2fDjMKtblQnjFLeA4hECA,286
180
180
  re_common/v2/baselibrary/utils/string_bool.py,sha256=f5qYdKvTufxmfSsxXN41WFLV--vCwDWU2LeQPbDvKZY,178
181
181
  re_common/v2/baselibrary/utils/string_clear.py,sha256=LqGvv-UZnsVwiDBN3-PdzDUTfWlAsKsvKlkXqySI0eE,3244
182
- re_common/v2/baselibrary/utils/stringutils.py,sha256=lhDvRL60S6gjhU4D0nfk2Y-c25IyYdYOD0TMoCx-huE,2658
182
+ re_common/v2/baselibrary/utils/stringutils.py,sha256=quAgCdW_ayQwY4AqnZZkZ4NlcSEcy6f1arOVSeP2vEo,2699
183
183
  re_common/vip/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
184
184
  re_common/vip/base_step_process.py,sha256=VXXiNj0I5CpzXIMCgOPU86bzDJkSBkUS-9CpZIl_GOk,205
185
185
  re_common/vip/baseencodeid.py,sha256=nERoe89ueFM52bG7xwJdflcZHk6T2RQQKbc5uUZc3RM,3272
@@ -206,8 +206,8 @@ re_common/vip/title/transform/TransformRegulationTitleToZt.py,sha256=LKRdIsWKues
206
206
  re_common/vip/title/transform/TransformStandardTitleToZt.py,sha256=-fCKAbSBzXVyQDCE61CalvR9E_QzQMA08QOO_NePFNI,5563
207
207
  re_common/vip/title/transform/TransformThesisTitleToZt.py,sha256=QS-uV0cQrpUFAcKucuJQ9Ue2VRQH-inmfn_X3IplfRo,5488
208
208
  re_common/vip/title/transform/__init__.py,sha256=m83-CWyRq_VHPYHaALEQlmXrkTdrZ3e4B_kCfBYE-uc,239
209
- re_common-10.0.0.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
210
- re_common-10.0.0.dist-info/METADATA,sha256=C8xtx6EWq_g7ScVYYKNZRwq7IuZ_z2esfPwhztPshE0,581
211
- re_common-10.0.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
212
- re_common-10.0.0.dist-info/top_level.txt,sha256=_H9H23zoLIalm1AIY_KYTVh_H0ZnmjxQIxsvXtLv45o,10
213
- re_common-10.0.0.dist-info/RECORD,,
209
+ re_common-10.0.1.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
210
+ re_common-10.0.1.dist-info/METADATA,sha256=xIF1hPdvDgN_bQ3YpyAG3_tjxGOIVQvNUM5NraOe73o,581
211
+ re_common-10.0.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
212
+ re_common-10.0.1.dist-info/top_level.txt,sha256=_H9H23zoLIalm1AIY_KYTVh_H0ZnmjxQIxsvXtLv45o,10
213
+ re_common-10.0.1.dist-info/RECORD,,