PyPI - re-common - Versions diffs - 10.0.0__py3-none-any.whl → 10.0.1__py3-none-any.whl - Mend

re-common 10.0.0py3-none-any.whl → 10.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

re_common/v2/baselibrary/utils/author_smi.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import copy
 import re
 import string
@@ -248,6 +249,16 @@ def AuthorRatio(
     if len(l1) == len(l2) and (is_same_or_initials_match(l1, l2) or set(l1) == set(l2)):
         return 1
+    # 在这里针对上面一条算法再增加一条算法，先对list 排序在对他进行上面的对比
+    # 如果长度相等 简写也是单词的首字母 那么两个名字一致 举例:Guo, Qiang @@ Q. Guo
+    sort_l1 = copy.deepcopy(l1)
+    sort_l2 = copy.deepcopy(l2)
+    sort_l1.sort()
+    sort_l2.sort()
+    if len(sort_l1) == len(sort_l2) and (is_same_or_initials_match(sort_l1, sort_l2) or set(sort_l1) == set(sort_l2)):
+        return 0.99
     ##############################################################
     # 以上为情况穷举情况，以下为其他情况的相似率计算
     ##############################################################
@@ -262,7 +273,7 @@ def AuthorRatio(
     len_ratio = len1 / len2 if len1 > len2 else len2 / len1
     # 计算归一化的 Indel 相似度。 对于比率<score_cutoff，返回0。
-    end_ratio = normal_end_ratio = Jaro.normalized_similarity(s1, s2)
+    end_ratio = normal_end_ratio = Jaro.normalized_similarity(s1.lower(), s2.lower())
     # 需要对作者的比率分布进行调研决定哪些是小比率哪些是大比率
     if len_ratio > 1.5 and len_ratio < 3:
@@ -287,7 +298,7 @@ def AuthorRatio(
     # 首字母相同提分
     # if is_contained(extract_initials(s1), extract_initials(s2)):
-    if is_contained_list([i[:1] for i in l1], [i[:1] for i in l2]):
+    if is_contained_list([i[:1].lower() for i in l1], [i[:1].lower() for i in l2]):
         # 应该提分
         end_ratio = end_ratio * 1.05
     else:
@@ -302,7 +313,7 @@ def AuthorRatio(
         end_ratio = end_ratio * 1.1
     if l1[0] != l2[0]:
-        end_ratio = end_ratio * Jaro.normalized_similarity(l1[0], l2[0])
+        end_ratio = end_ratio * Jaro.normalized_similarity(l1[0].lower(), l2[0].lower())
     # 如果字符串本身的相似度高 应该拉上去 否者应该拉下来
     return min(end_ratio, 1) * 0.5 + normal_end_ratio * 0.5

re_common/v2/baselibrary/utils/stringutils.py CHANGED Viewed

@@ -63,6 +63,7 @@ def get_diacritic_variant(char1):
     return base_char1
 def get_alphabetic_ratio(text: str) -> float:
+    # 返回字母型字符所占比例
     if not text:
         return 0

{re_common-10.0.0.dist-info → re_common-10.0.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: re_common
-Version: 10.0.0
+Version: 10.0.1
 Summary: a library about all python projects
 Home-page: https://gitee.com/xujiangios/re-common
 Author: vic

{re_common-10.0.0.dist-info → re_common-10.0.1.dist-info}/RECORD RENAMED Viewed

@@ -173,13 +173,13 @@ re_common/v2/baselibrary/tools/text_matcher.py,sha256=F4WtLO-b7H6V9TIvOntCD9ZXSQ
 re_common/v2/baselibrary/tools/unionfind_tools.py,sha256=VYHZZPXwBYljsm7TjV1B6iCgDn3O3btzNf9hMvQySVU,2965
 re_common/v2/baselibrary/utils/BusinessStringUtil.py,sha256=dxrWO800wElZM_4aKolUHSPBYZlxqzXukE4M-LZ13jA,2644
 re_common/v2/baselibrary/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-re_common/v2/baselibrary/utils/author_smi.py,sha256=_P3I5JXvxHqNNWUwhAyHiJuBFiC0tXvGD8-_HxNiuEU,11051
+re_common/v2/baselibrary/utils/author_smi.py,sha256=wkuoGEBNM28k8D1E83vBxJD5N4xgzr6aAQFMVPJ2tnc,11585
 re_common/v2/baselibrary/utils/basedict.py,sha256=tSV85pARe8ZQDY77_h_heS81EWwcgJW076DcA9WQyjY,1161
 re_common/v2/baselibrary/utils/basehdfs.py,sha256=NVV5Q0OMPlM_zTrs9ZDoPJv29GQv5wi9-AP1us5dBrQ,4651
 re_common/v2/baselibrary/utils/json_cls.py,sha256=dHOkWafG9lbQDoub9cbDwT2fDjMKtblQnjFLeA4hECA,286
 re_common/v2/baselibrary/utils/string_bool.py,sha256=f5qYdKvTufxmfSsxXN41WFLV--vCwDWU2LeQPbDvKZY,178
 re_common/v2/baselibrary/utils/string_clear.py,sha256=LqGvv-UZnsVwiDBN3-PdzDUTfWlAsKsvKlkXqySI0eE,3244
-re_common/v2/baselibrary/utils/stringutils.py,sha256=lhDvRL60S6gjhU4D0nfk2Y-c25IyYdYOD0TMoCx-huE,2658
+re_common/v2/baselibrary/utils/stringutils.py,sha256=quAgCdW_ayQwY4AqnZZkZ4NlcSEcy6f1arOVSeP2vEo,2699
 re_common/vip/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 re_common/vip/base_step_process.py,sha256=VXXiNj0I5CpzXIMCgOPU86bzDJkSBkUS-9CpZIl_GOk,205
 re_common/vip/baseencodeid.py,sha256=nERoe89ueFM52bG7xwJdflcZHk6T2RQQKbc5uUZc3RM,3272
@@ -206,8 +206,8 @@ re_common/vip/title/transform/TransformRegulationTitleToZt.py,sha256=LKRdIsWKues
 re_common/vip/title/transform/TransformStandardTitleToZt.py,sha256=-fCKAbSBzXVyQDCE61CalvR9E_QzQMA08QOO_NePFNI,5563
 re_common/vip/title/transform/TransformThesisTitleToZt.py,sha256=QS-uV0cQrpUFAcKucuJQ9Ue2VRQH-inmfn_X3IplfRo,5488
 re_common/vip/title/transform/__init__.py,sha256=m83-CWyRq_VHPYHaALEQlmXrkTdrZ3e4B_kCfBYE-uc,239
-re_common-10.0.0.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
-re_common-10.0.0.dist-info/METADATA,sha256=C8xtx6EWq_g7ScVYYKNZRwq7IuZ_z2esfPwhztPshE0,581
-re_common-10.0.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-re_common-10.0.0.dist-info/top_level.txt,sha256=_H9H23zoLIalm1AIY_KYTVh_H0ZnmjxQIxsvXtLv45o,10
-re_common-10.0.0.dist-info/RECORD,,
+re_common-10.0.1.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
+re_common-10.0.1.dist-info/METADATA,sha256=xIF1hPdvDgN_bQ3YpyAG3_tjxGOIVQvNUM5NraOe73o,581
+re_common-10.0.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+re_common-10.0.1.dist-info/top_level.txt,sha256=_H9H23zoLIalm1AIY_KYTVh_H0ZnmjxQIxsvXtLv45o,10
+re_common-10.0.1.dist-info/RECORD,,

{re_common-10.0.0.dist-info → re_common-10.0.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{re_common-10.0.0.dist-info → re_common-10.0.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{re_common-10.0.0.dist-info → re_common-10.0.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

re-common 10.0.0__py3-none-any.whl → 10.0.1__py3-none-any.whl

re-common 10.0.0py3-none-any.whl → 10.0.1py3-none-any.whl