re-common 10.0.8__py3-none-any.whl → 10.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@ class DotDict(dict):
2
2
  """
3
3
  让字典成为对象 既可以用字典方式访问 也可以用点访问key
4
4
  """
5
+
5
6
  def __init__(self, *args, **kwargs):
6
7
  super().__init__(*args, **kwargs)
7
8
  # 递归地将嵌套字典转换为 DotDict
@@ -21,4 +22,16 @@ class DotDict(dict):
21
22
  def __setattr__(self, key, value):
22
23
  if isinstance(value, dict): # 如果值是字典,转换为 DotDict
23
24
  value = DotDict(value)
24
- self[key] = value
25
+ self[key] = value
26
+
27
+ def to_dict(self):
28
+ """
29
+ 将 DotDict 实例转换为普通字典
30
+ """
31
+ result = {}
32
+ for key, value in self.items():
33
+ if isinstance(value, DotDict):
34
+ result[key] = value.to_dict()
35
+ else:
36
+ result[key] = value
37
+ return result
@@ -1,7 +1,7 @@
1
1
  from typing import List
2
2
 
3
3
  import jieba
4
- from datasketch import MinHash
4
+ from datasketch import MinHash, minhash
5
5
 
6
6
 
7
7
  def tokenize(text: str, stopwords=None) -> List[str]:
@@ -31,3 +31,13 @@ def create_minhash(words: List[str], num_perm=128) -> MinHash:
31
31
  for word in words:
32
32
  minhash.update(word.encode("utf-8"))
33
33
  return minhash
34
+
35
+
36
+ def get_str_minhash(title):
37
+ from re_common.v2.baselibrary.utils.string_clear import rel_clear
38
+ rel_title = rel_clear(title)
39
+ if not rel_title:
40
+ return ""
41
+ words = tokenize(rel_title)
42
+ minhash = create_minhash(words)
43
+ return minhash
@@ -10,15 +10,35 @@ def contains_chinese_chars(s):
10
10
 
11
11
 
12
12
  def is_empty(value):
13
+ """
14
+ 判断一个值是否为空。
15
+
16
+ 支持的类型:
17
+ - None
18
+ - 空字符串(去除空白后)
19
+ - pandas 的 NaN
20
+ - 其他可迭代类型(如列表、字典等)的长度为 0
21
+ - 其他情况返回 False
22
+ """
13
23
  # 如果是 None,直接返回 True
14
24
  if value is None:
15
25
  return True
16
26
 
27
+ # 尝试处理 pandas 的 NaN
28
+ try:
29
+ import pandas as pd
30
+ if pd.isna(value):
31
+ return True
32
+ except ImportError:
33
+ pass # 如果没有安装 pandas,跳过
34
+
17
35
  # 如果是字符串,检查去除空白后是否为空
18
36
  if isinstance(value, str):
19
37
  return value.strip() == ""
20
38
 
21
- # 可选:处理其他可迭代类型(如列表、字典等)
39
+
40
+
41
+ # 处理其他可迭代类型(如列表、字典等)
22
42
  if hasattr(value, "__len__"):
23
43
  return len(value) == 0
24
44
 
@@ -165,10 +165,11 @@ def ref_clear(str_obj):
165
165
  def clear_obj(str_obj):
166
166
  # 为对象化定制的清理
167
167
  str_obj = clear_au_organ(str_obj)
168
- str_obj = str_obj.replace("ß", "SS")
169
- return (StringClear(str_obj)
170
- .remove_diacritics() # 清理音标
171
- .upper()
172
- .get_str() # 获取str
173
- .strip() # 去掉空格
174
- )
168
+ # str_obj = str_obj.replace("ß", "SS") # "ß" 的 大写就是 "SS"
169
+ result = (StringClear(str_obj)
170
+ .remove_diacritics() # 清理音标
171
+ .upper()
172
+ .get_str() # 获取str
173
+ .strip() # 去掉空格
174
+ )
175
+ return result
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: re_common
3
- Version: 10.0.8
3
+ Version: 10.0.9
4
4
  Summary: a library about all python projects
5
5
  Home-page: https://gitee.com/xujiangios/re-common
6
6
  Author: vic
@@ -168,10 +168,10 @@ re_common/v2/baselibrary/s3object/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
168
168
  re_common/v2/baselibrary/s3object/baseboto3.py,sha256=mXuIFx99pnrPGQ4LJCZwlN1HLbaU-OWLwck0cVzW6hc,11203
169
169
  re_common/v2/baselibrary/tools/WeChatRobot.py,sha256=EaQgNncROAhU5-psYRGWAshIV5aEw-p2u1kYLpvr7RA,2796
170
170
  re_common/v2/baselibrary/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
171
- re_common/v2/baselibrary/tools/dict_tools.py,sha256=HW-YZOUhv5GMzFsF-ArLfDoszui1K3_M7IiRIe4VEXA,909
171
+ re_common/v2/baselibrary/tools/dict_tools.py,sha256=BTh7oJuJ619IZgxiYlim0ltrXBclDtb7WzyFGr7wVf0,1246
172
172
  re_common/v2/baselibrary/tools/dolphinscheduler.py,sha256=1m7UGYDiuvJUCI6ik6CGM2fO8U5XteJzn55VRbwB9ts,7978
173
173
  re_common/v2/baselibrary/tools/list_tools.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
174
- re_common/v2/baselibrary/tools/search_hash_tools.py,sha256=o-PNjmcYDkfyiR75Jci_9sSn4cGi_F9jPCIrwYdnb1U,1013
174
+ re_common/v2/baselibrary/tools/search_hash_tools.py,sha256=GfZf_zFgEMm6DO0w7d70Fzv1iKzq0WqBTMEfzjEuBAw,1292
175
175
  re_common/v2/baselibrary/tools/text_matcher.py,sha256=F4WtLO-b7H6V9TIvOntCD9ZXSQP_KijPuLLYcLPtrKQ,7021
176
176
  re_common/v2/baselibrary/tools/unionfind_tools.py,sha256=VYHZZPXwBYljsm7TjV1B6iCgDn3O3btzNf9hMvQySVU,2965
177
177
  re_common/v2/baselibrary/utils/BusinessStringUtil.py,sha256=tzjVr_-6iPAKTt14hR-BhRshdRgeT_MPJpUQkxcTXns,4084
@@ -181,8 +181,8 @@ re_common/v2/baselibrary/utils/basedict.py,sha256=tSV85pARe8ZQDY77_h_heS81EWwcgJ
181
181
  re_common/v2/baselibrary/utils/basehdfs.py,sha256=NVV5Q0OMPlM_zTrs9ZDoPJv29GQv5wi9-AP1us5dBrQ,4651
182
182
  re_common/v2/baselibrary/utils/json_cls.py,sha256=dHOkWafG9lbQDoub9cbDwT2fDjMKtblQnjFLeA4hECA,286
183
183
  re_common/v2/baselibrary/utils/n_ary_expression_tree.py,sha256=-05kO6G2Rth7CEK-5lfFrthFZ1Q0-0a7cni7mWZ-2gg,9172
184
- re_common/v2/baselibrary/utils/string_bool.py,sha256=4VCr1g8pX5YnzZSKctQgQfmhSQ0aw7a8ruhWdiRmBFU,641
185
- re_common/v2/baselibrary/utils/string_clear.py,sha256=g_2s2C4yY0C5AvuANjn02g7e_VM_uNY1lxoQg5HtLrk,5799
184
+ re_common/v2/baselibrary/utils/string_bool.py,sha256=tR9JrZuuBxz7oDgSpndKAeer0BYYFrhSxikUsNkHUeg,1099
185
+ re_common/v2/baselibrary/utils/string_clear.py,sha256=LDIf-3Czq1sXp-54aifXdXbdGUX7hpFBKqQa5Azj_lo,5861
186
186
  re_common/v2/baselibrary/utils/stringutils.py,sha256=GLXHAm8IulC_8hWrN2aiFQjsoOpjczvcVozmTJj86-A,3864
187
187
  re_common/vip/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
188
188
  re_common/vip/base_step_process.py,sha256=VXXiNj0I5CpzXIMCgOPU86bzDJkSBkUS-9CpZIl_GOk,205
@@ -210,8 +210,8 @@ re_common/vip/title/transform/TransformRegulationTitleToZt.py,sha256=LKRdIsWKues
210
210
  re_common/vip/title/transform/TransformStandardTitleToZt.py,sha256=-fCKAbSBzXVyQDCE61CalvR9E_QzQMA08QOO_NePFNI,5563
211
211
  re_common/vip/title/transform/TransformThesisTitleToZt.py,sha256=QS-uV0cQrpUFAcKucuJQ9Ue2VRQH-inmfn_X3IplfRo,5488
212
212
  re_common/vip/title/transform/__init__.py,sha256=m83-CWyRq_VHPYHaALEQlmXrkTdrZ3e4B_kCfBYE-uc,239
213
- re_common-10.0.8.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
214
- re_common-10.0.8.dist-info/METADATA,sha256=687IQ2myx3vDwQca9JMzKTf8KHCR6qSst65ykG1VZ9Y,581
215
- re_common-10.0.8.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
216
- re_common-10.0.8.dist-info/top_level.txt,sha256=_H9H23zoLIalm1AIY_KYTVh_H0ZnmjxQIxsvXtLv45o,10
217
- re_common-10.0.8.dist-info/RECORD,,
213
+ re_common-10.0.9.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
214
+ re_common-10.0.9.dist-info/METADATA,sha256=7EH3E_6nA_nQ7s190Qrc4ylAMGaywO-s_z79css2utM,581
215
+ re_common-10.0.9.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
216
+ re_common-10.0.9.dist-info/top_level.txt,sha256=_H9H23zoLIalm1AIY_KYTVh_H0ZnmjxQIxsvXtLv45o,10
217
+ re_common-10.0.9.dist-info/RECORD,,