re-common 10.0.19__py3-none-any.whl → 10.0.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- re_common/baselibrary/__init__.py +4 -4
- re_common/baselibrary/baseabs/__init__.py +6 -6
- re_common/baselibrary/baseabs/baseabs.py +26 -26
- re_common/baselibrary/database/mbuilder.py +132 -132
- re_common/baselibrary/database/moudle.py +93 -93
- re_common/baselibrary/database/msqlite3.py +194 -194
- re_common/baselibrary/database/mysql.py +169 -169
- re_common/baselibrary/database/sql_factory.py +26 -26
- re_common/baselibrary/mthread/MThreadingRun.py +486 -486
- re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -349
- re_common/baselibrary/mthread/__init__.py +2 -2
- re_common/baselibrary/mthread/mythreading.py +695 -695
- re_common/baselibrary/pakge_other/socks.py +404 -404
- re_common/baselibrary/readconfig/config_factory.py +18 -18
- re_common/baselibrary/readconfig/ini_config.py +317 -317
- re_common/baselibrary/readconfig/toml_config.py +49 -49
- re_common/baselibrary/temporary/envdata.py +36 -36
- re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -118
- re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -102
- re_common/baselibrary/tools/all_requests/mrequest.py +412 -412
- re_common/baselibrary/tools/all_requests/requests_request.py +81 -81
- re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -31
- re_common/baselibrary/tools/contrast_db3.py +123 -123
- re_common/baselibrary/tools/copy_file.py +39 -39
- re_common/baselibrary/tools/db3_2_sizedb3.py +102 -102
- re_common/baselibrary/tools/foreachgz.py +39 -39
- re_common/baselibrary/tools/get_attr.py +10 -10
- re_common/baselibrary/tools/image_to_pdf.py +61 -61
- re_common/baselibrary/tools/java_code_deal.py +139 -139
- re_common/baselibrary/tools/javacode.py +79 -79
- re_common/baselibrary/tools/mdb_db3.py +48 -48
- re_common/baselibrary/tools/merge_file.py +171 -171
- re_common/baselibrary/tools/merge_gz_file.py +165 -165
- re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -42
- re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -42
- re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -38
- re_common/baselibrary/tools/mongo_tools.py +50 -50
- re_common/baselibrary/tools/move_file.py +170 -170
- re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -63
- re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -354
- re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -18
- re_common/baselibrary/tools/move_mongo/use_mv.py +93 -93
- re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -125
- re_common/baselibrary/tools/mpandas/pandas_visualization.py +7 -7
- re_common/baselibrary/tools/myparsel.py +104 -104
- re_common/baselibrary/tools/rename_dir_file.py +37 -37
- re_common/baselibrary/tools/sequoiadb_utils.py +398 -398
- re_common/baselibrary/tools/split_line_to_many.py +25 -25
- re_common/baselibrary/tools/stringtodicts.py +33 -33
- re_common/baselibrary/tools/workwechant_bot.py +84 -84
- re_common/baselibrary/utils/baseaiohttp.py +296 -296
- re_common/baselibrary/utils/baseaiomysql.py +87 -87
- re_common/baselibrary/utils/baseallstep.py +191 -191
- re_common/baselibrary/utils/baseavro.py +19 -19
- re_common/baselibrary/utils/baseboto3.py +291 -291
- re_common/baselibrary/utils/basecsv.py +32 -32
- re_common/baselibrary/utils/basedict.py +133 -133
- re_common/baselibrary/utils/basedir.py +241 -241
- re_common/baselibrary/utils/baseencode.py +351 -351
- re_common/baselibrary/utils/baseencoding.py +28 -28
- re_common/baselibrary/utils/baseesdsl.py +86 -86
- re_common/baselibrary/utils/baseexcel.py +264 -264
- re_common/baselibrary/utils/baseexcept.py +109 -109
- re_common/baselibrary/utils/basefile.py +654 -654
- re_common/baselibrary/utils/baseftp.py +214 -214
- re_common/baselibrary/utils/basegzip.py +60 -60
- re_common/baselibrary/utils/basehdfs.py +135 -135
- re_common/baselibrary/utils/basehttpx.py +268 -268
- re_common/baselibrary/utils/baseip.py +87 -87
- re_common/baselibrary/utils/basejson.py +2 -2
- re_common/baselibrary/utils/baselist.py +32 -32
- re_common/baselibrary/utils/basemotor.py +190 -190
- re_common/baselibrary/utils/basemssql.py +98 -98
- re_common/baselibrary/utils/baseodbc.py +113 -113
- re_common/baselibrary/utils/basepandas.py +302 -302
- re_common/baselibrary/utils/basepeewee.py +11 -11
- re_common/baselibrary/utils/basepika.py +180 -180
- re_common/baselibrary/utils/basepydash.py +143 -143
- re_common/baselibrary/utils/basepymongo.py +230 -230
- re_common/baselibrary/utils/basequeue.py +22 -22
- re_common/baselibrary/utils/baserar.py +57 -57
- re_common/baselibrary/utils/baserequest.py +279 -279
- re_common/baselibrary/utils/baseset.py +8 -8
- re_common/baselibrary/utils/basesmb.py +403 -403
- re_common/baselibrary/utils/basestring.py +382 -382
- re_common/baselibrary/utils/basetime.py +320 -320
- re_common/baselibrary/utils/baseurl.py +121 -121
- re_common/baselibrary/utils/basezip.py +57 -57
- re_common/baselibrary/utils/core/__init__.py +7 -7
- re_common/baselibrary/utils/core/bottomutils.py +18 -18
- re_common/baselibrary/utils/core/mdeprecated.py +327 -327
- re_common/baselibrary/utils/core/mlamada.py +16 -16
- re_common/baselibrary/utils/core/msginfo.py +25 -25
- re_common/baselibrary/utils/core/requests_core.py +103 -103
- re_common/baselibrary/utils/fateadm.py +429 -429
- re_common/baselibrary/utils/importfun.py +123 -123
- re_common/baselibrary/utils/mfaker.py +57 -57
- re_common/baselibrary/utils/my_abc/__init__.py +3 -3
- re_common/baselibrary/utils/my_abc/better_abc.py +32 -32
- re_common/baselibrary/utils/mylogger.py +414 -414
- re_common/baselibrary/utils/myredisclient.py +861 -861
- re_common/baselibrary/utils/pipupgrade.py +21 -21
- re_common/baselibrary/utils/ringlist.py +85 -85
- re_common/baselibrary/utils/version_compare.py +36 -36
- re_common/baselibrary/utils/ydmhttp.py +126 -126
- re_common/facade/lazy_import.py +11 -11
- re_common/facade/loggerfacade.py +25 -25
- re_common/facade/mysqlfacade.py +467 -467
- re_common/facade/now.py +31 -31
- re_common/facade/sqlite3facade.py +257 -257
- re_common/facade/use/mq_use_facade.py +83 -83
- re_common/facade/use/proxy_use_facade.py +19 -19
- re_common/libtest/base_dict_test.py +19 -19
- re_common/libtest/baseavro_test.py +13 -13
- re_common/libtest/basefile_test.py +14 -14
- re_common/libtest/basemssql_test.py +77 -77
- re_common/libtest/baseodbc_test.py +7 -7
- re_common/libtest/basepandas_test.py +38 -38
- re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -14
- re_common/libtest/get_attr_test/settings.py +54 -54
- re_common/libtest/idencode_test.py +53 -53
- re_common/libtest/iniconfig_test.py +35 -35
- re_common/libtest/ip_test.py +34 -34
- re_common/libtest/merge_file_test.py +20 -20
- re_common/libtest/mfaker_test.py +8 -8
- re_common/libtest/mm3_test.py +31 -31
- re_common/libtest/mylogger_test.py +88 -88
- re_common/libtest/myparsel_test.py +27 -27
- re_common/libtest/mysql_test.py +151 -151
- re_common/libtest/pymongo_test.py +21 -21
- re_common/libtest/split_test.py +11 -11
- re_common/libtest/sqlite3_merge_test.py +5 -5
- re_common/libtest/sqlite3_test.py +34 -34
- re_common/libtest/tomlconfig_test.py +30 -30
- re_common/libtest/use_tools_test/__init__.py +2 -2
- re_common/libtest/user/__init__.py +4 -4
- re_common/studio/__init__.py +4 -4
- re_common/studio/assignment_expressions.py +36 -36
- re_common/studio/mydash/test1.py +18 -18
- re_common/studio/pydashstudio/first.py +9 -9
- re_common/studio/streamlitstudio/first_app.py +65 -65
- re_common/studio/streamlitstudio/uber_pickups.py +23 -23
- re_common/studio/test.py +18 -18
- re_common/v2/baselibrary/decorators/utils.py +59 -59
- re_common/v2/baselibrary/s3object/baseboto3.py +230 -230
- re_common/v2/baselibrary/tools/WeChatRobot.py +79 -79
- re_common/v2/baselibrary/tools/ac_ahocorasick.py +75 -75
- re_common/v2/baselibrary/tools/dict_tools.py +37 -37
- re_common/v2/baselibrary/tools/dolphinscheduler.py +187 -187
- re_common/v2/baselibrary/tools/hdfs_data_processer.py +338 -338
- re_common/v2/baselibrary/tools/list_tools.py +65 -65
- re_common/v2/baselibrary/tools/search_hash_tools.py +54 -54
- re_common/v2/baselibrary/tools/text_matcher.py +326 -326
- re_common/v2/baselibrary/tools/unionfind_tools.py +60 -60
- re_common/v2/baselibrary/utils/BusinessStringUtil.py +196 -196
- re_common/v2/baselibrary/utils/author_smi.py +360 -360
- re_common/v2/baselibrary/utils/base_string_similarity.py +158 -158
- re_common/v2/baselibrary/utils/basedict.py +37 -37
- re_common/v2/baselibrary/utils/basehdfs.py +161 -127
- re_common/v2/baselibrary/utils/basepika.py +180 -180
- re_common/v2/baselibrary/utils/basetime.py +77 -0
- re_common/v2/baselibrary/utils/db.py +38 -38
- re_common/v2/baselibrary/utils/json_cls.py +16 -11
- re_common/v2/baselibrary/utils/mq.py +83 -83
- re_common/v2/baselibrary/utils/n_ary_expression_tree.py +243 -243
- re_common/v2/baselibrary/utils/string_bool.py +149 -149
- re_common/v2/baselibrary/utils/string_clear.py +204 -202
- re_common/v2/baselibrary/utils/string_smi.py +18 -18
- re_common/v2/baselibrary/utils/stringutils.py +213 -213
- re_common/vip/base_step_process.py +11 -11
- re_common/vip/baseencodeid.py +90 -90
- re_common/vip/changetaskname.py +28 -28
- re_common/vip/core_var.py +24 -24
- re_common/vip/mmh3Hash.py +89 -89
- re_common/vip/proxy/allproxys.py +127 -127
- re_common/vip/proxy/allproxys_thread.py +159 -159
- re_common/vip/proxy/cnki_proxy.py +153 -153
- re_common/vip/proxy/kuaidaili.py +87 -87
- re_common/vip/proxy/proxy_all.py +113 -113
- re_common/vip/proxy/update_kuaidaili_0.py +42 -42
- re_common/vip/proxy/wanfang_proxy.py +152 -152
- re_common/vip/proxy/wp_proxy_all.py +181 -181
- re_common/vip/read_rawid_to_txt.py +91 -91
- re_common/vip/title/__init__.py +5 -5
- re_common/vip/title/transform/TransformBookTitleToZt.py +125 -125
- re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -139
- re_common/vip/title/transform/TransformCstadTitleToZt.py +195 -195
- re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -203
- re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -132
- re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -114
- re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -135
- re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -135
- re_common/vip/title/transform/__init__.py +10 -10
- {re_common-10.0.19.dist-info → re_common-10.0.22.dist-info}/LICENSE +201 -201
- {re_common-10.0.19.dist-info → re_common-10.0.22.dist-info}/METADATA +16 -24
- re_common-10.0.22.dist-info/RECORD +227 -0
- {re_common-10.0.19.dist-info → re_common-10.0.22.dist-info}/WHEEL +1 -1
- re_common-10.0.19.dist-info/RECORD +0 -226
- {re_common-10.0.19.dist-info → re_common-10.0.22.dist-info}/top_level.txt +0 -0
|
@@ -1,149 +1,149 @@
|
|
|
1
|
-
import re
|
|
2
|
-
|
|
3
|
-
import unicodedata
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def is_all_english_chars(s):
|
|
7
|
-
return bool(re.match(r'^[A-Za-z]+$', s))
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def contains_chinese_chars(s):
|
|
11
|
-
return bool(re.search(r'[\u3400-\u9fff]', s))
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def is_empty(value):
|
|
15
|
-
"""
|
|
16
|
-
判断一个值是否为空。
|
|
17
|
-
|
|
18
|
-
支持的类型:
|
|
19
|
-
- None
|
|
20
|
-
- 空字符串(去除空白后)
|
|
21
|
-
- pandas 的 NaN
|
|
22
|
-
- 其他可迭代类型(如列表、字典等)的长度为 0
|
|
23
|
-
- 其他情况返回 False
|
|
24
|
-
"""
|
|
25
|
-
# 如果是 None,直接返回 True
|
|
26
|
-
if value is None:
|
|
27
|
-
return True
|
|
28
|
-
|
|
29
|
-
# 尝试处理 pandas 的 NaN
|
|
30
|
-
try:
|
|
31
|
-
import pandas as pd
|
|
32
|
-
if pd.isna(value):
|
|
33
|
-
return True
|
|
34
|
-
except:
|
|
35
|
-
pass # 如果没有安装 pandas,跳过
|
|
36
|
-
|
|
37
|
-
# 如果是字符串,检查去除空白后是否为空
|
|
38
|
-
if isinstance(value, str):
|
|
39
|
-
return value.strip() == ""
|
|
40
|
-
|
|
41
|
-
# 处理其他可迭代类型(如列表、字典等)
|
|
42
|
-
if hasattr(value, "__len__"):
|
|
43
|
-
return len(value) == 0
|
|
44
|
-
|
|
45
|
-
# 默认情况下,非 None、非空类型返回 False
|
|
46
|
-
return False
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
class InvalidCharLengthError(Exception):
|
|
50
|
-
"""自定义异常类,用于处理输入字符长度不为 1 的情况"""
|
|
51
|
-
pass
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def is_single_cjk_char(char):
|
|
55
|
-
"""
|
|
56
|
-
判断单个字符是否为中日韩字符
|
|
57
|
-
:param char: 要判断的单个字符
|
|
58
|
-
:return: 如果是中日韩字符返回 True,否则返回 False
|
|
59
|
-
"""
|
|
60
|
-
# 检查输入字符的长度
|
|
61
|
-
if len(char) != 1:
|
|
62
|
-
raise InvalidCharLengthError("输入的字符串长度必须为 1,请提供单个字符进行判断。")
|
|
63
|
-
code_point = ord(char)
|
|
64
|
-
# 中日韩统一表意文字
|
|
65
|
-
ranges = [
|
|
66
|
-
(0x4E00, 0x9FFF), # CJK 统一表意符号
|
|
67
|
-
(0x3400, 0x4DBF), # CJK 统一表意符号扩展 A
|
|
68
|
-
(0x20000, 0x2A6DF), # CJK 统一表意符号扩展 B
|
|
69
|
-
(0x2A700, 0x2B73F), # CJK 统一表意符号扩展 C
|
|
70
|
-
(0x2B740, 0x2B81F), # CJK 统一表意符号扩展 D
|
|
71
|
-
(0x2B820, 0x2CEAF), # CJK 统一表意符号扩展 E
|
|
72
|
-
(0x2CEB0, 0x2EBEF), # CJK 统一表意符号扩展 F
|
|
73
|
-
(0x30000, 0x3134F), # CJK 统一表意符号扩展 G
|
|
74
|
-
(0x31350, 0x323AF), # CJK 统一表意符号扩展 H
|
|
75
|
-
(0x3300, 0x33FF), # CJK 兼容符号
|
|
76
|
-
(0xFE30, 0xFE4F), # CJK 兼容形式
|
|
77
|
-
(0xF900, 0xFAFF), # CJK 兼容表意符号
|
|
78
|
-
(0x2F800, 0x2FA1F), # CJK 兼容表意符号补充
|
|
79
|
-
(0x3105, 0x3129), # 注音字母
|
|
80
|
-
(0x31A0, 0x31BF), # 注音字母扩展
|
|
81
|
-
(0x3040, 0x309F), # 平假名
|
|
82
|
-
(0x30A0, 0x30FF), # 片假名
|
|
83
|
-
(0x31F0, 0x31FF), # 片假名扩展
|
|
84
|
-
(0xAC00, 0xD7AF), # 韩文音节
|
|
85
|
-
(0x1100, 0x11FF), # 韩文字母
|
|
86
|
-
(0xA960, 0xA97F), # 韩文字母扩展 A
|
|
87
|
-
(0xD7B0, 0xD7FF), # 韩文字母扩展 B
|
|
88
|
-
]
|
|
89
|
-
for start, end in ranges:
|
|
90
|
-
if start <= code_point <= end:
|
|
91
|
-
return True
|
|
92
|
-
return False
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
def is_all_symbols(text):
|
|
96
|
-
# 是否全是符号
|
|
97
|
-
# 如果字符串为空,返回 False
|
|
98
|
-
if not text:
|
|
99
|
-
return False
|
|
100
|
-
|
|
101
|
-
# 检查每个字符是否属于符号类别
|
|
102
|
-
return all(unicodedata.category(char).startswith(('P', 'S')) for char in text)
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def is_whole_word_en(sub_str: str, long_str: str) -> bool:
|
|
106
|
-
"""
|
|
107
|
-
判断 sub_str 在 long_str 中是否是一个完整的单词(而不是其他单词的一部分)。
|
|
108
|
-
|
|
109
|
-
参数:
|
|
110
|
-
sub_str: 要搜索的单词
|
|
111
|
-
long_str: 被搜索的文本
|
|
112
|
-
|
|
113
|
-
返回:
|
|
114
|
-
bool: 如果 sub_str 是 long_str 中的一个完整单词则返回 True,否则返回 False
|
|
115
|
-
"""
|
|
116
|
-
regex_pattern = re.compile(r"[^a-z0-9]")
|
|
117
|
-
|
|
118
|
-
if not sub_str or not long_str:
|
|
119
|
-
return False
|
|
120
|
-
|
|
121
|
-
# 使用 startsWith 和 endsWith 检查边界
|
|
122
|
-
if long_str.startswith(sub_str) and long_str.endswith(sub_str):
|
|
123
|
-
return True
|
|
124
|
-
|
|
125
|
-
# 检查是否在中间位置,且前后有非字母数字字符
|
|
126
|
-
index = long_str.find(sub_str)
|
|
127
|
-
if index >= 0:
|
|
128
|
-
if index == 0:
|
|
129
|
-
is_start = True
|
|
130
|
-
else:
|
|
131
|
-
is_start = bool(regex_pattern.match(long_str[index - 1]))
|
|
132
|
-
|
|
133
|
-
if len(long_str) == len(sub_str) + index:
|
|
134
|
-
is_end = True
|
|
135
|
-
else:
|
|
136
|
-
is_end = bool(regex_pattern.match(long_str[index + len(sub_str)]))
|
|
137
|
-
|
|
138
|
-
return is_start and is_end
|
|
139
|
-
else:
|
|
140
|
-
return False
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
def is_whole_word(sub_str: str, long_str: str) -> bool:
|
|
144
|
-
if contains_chinese_chars(sub_str):
|
|
145
|
-
return True
|
|
146
|
-
elif is_whole_word_en(sub_str, long_str):
|
|
147
|
-
return True
|
|
148
|
-
else:
|
|
149
|
-
return False
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
import unicodedata
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def is_all_english_chars(s):
|
|
7
|
+
return bool(re.match(r'^[A-Za-z]+$', s))
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def contains_chinese_chars(s):
|
|
11
|
+
return bool(re.search(r'[\u3400-\u9fff]', s))
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def is_empty(value):
|
|
15
|
+
"""
|
|
16
|
+
判断一个值是否为空。
|
|
17
|
+
|
|
18
|
+
支持的类型:
|
|
19
|
+
- None
|
|
20
|
+
- 空字符串(去除空白后)
|
|
21
|
+
- pandas 的 NaN
|
|
22
|
+
- 其他可迭代类型(如列表、字典等)的长度为 0
|
|
23
|
+
- 其他情况返回 False
|
|
24
|
+
"""
|
|
25
|
+
# 如果是 None,直接返回 True
|
|
26
|
+
if value is None:
|
|
27
|
+
return True
|
|
28
|
+
|
|
29
|
+
# 尝试处理 pandas 的 NaN
|
|
30
|
+
try:
|
|
31
|
+
import pandas as pd
|
|
32
|
+
if pd.isna(value):
|
|
33
|
+
return True
|
|
34
|
+
except:
|
|
35
|
+
pass # 如果没有安装 pandas,跳过
|
|
36
|
+
|
|
37
|
+
# 如果是字符串,检查去除空白后是否为空
|
|
38
|
+
if isinstance(value, str):
|
|
39
|
+
return value.strip() == ""
|
|
40
|
+
|
|
41
|
+
# 处理其他可迭代类型(如列表、字典等)
|
|
42
|
+
if hasattr(value, "__len__"):
|
|
43
|
+
return len(value) == 0
|
|
44
|
+
|
|
45
|
+
# 默认情况下,非 None、非空类型返回 False
|
|
46
|
+
return False
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class InvalidCharLengthError(Exception):
|
|
50
|
+
"""自定义异常类,用于处理输入字符长度不为 1 的情况"""
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def is_single_cjk_char(char):
|
|
55
|
+
"""
|
|
56
|
+
判断单个字符是否为中日韩字符
|
|
57
|
+
:param char: 要判断的单个字符
|
|
58
|
+
:return: 如果是中日韩字符返回 True,否则返回 False
|
|
59
|
+
"""
|
|
60
|
+
# 检查输入字符的长度
|
|
61
|
+
if len(char) != 1:
|
|
62
|
+
raise InvalidCharLengthError("输入的字符串长度必须为 1,请提供单个字符进行判断。")
|
|
63
|
+
code_point = ord(char)
|
|
64
|
+
# 中日韩统一表意文字
|
|
65
|
+
ranges = [
|
|
66
|
+
(0x4E00, 0x9FFF), # CJK 统一表意符号
|
|
67
|
+
(0x3400, 0x4DBF), # CJK 统一表意符号扩展 A
|
|
68
|
+
(0x20000, 0x2A6DF), # CJK 统一表意符号扩展 B
|
|
69
|
+
(0x2A700, 0x2B73F), # CJK 统一表意符号扩展 C
|
|
70
|
+
(0x2B740, 0x2B81F), # CJK 统一表意符号扩展 D
|
|
71
|
+
(0x2B820, 0x2CEAF), # CJK 统一表意符号扩展 E
|
|
72
|
+
(0x2CEB0, 0x2EBEF), # CJK 统一表意符号扩展 F
|
|
73
|
+
(0x30000, 0x3134F), # CJK 统一表意符号扩展 G
|
|
74
|
+
(0x31350, 0x323AF), # CJK 统一表意符号扩展 H
|
|
75
|
+
(0x3300, 0x33FF), # CJK 兼容符号
|
|
76
|
+
(0xFE30, 0xFE4F), # CJK 兼容形式
|
|
77
|
+
(0xF900, 0xFAFF), # CJK 兼容表意符号
|
|
78
|
+
(0x2F800, 0x2FA1F), # CJK 兼容表意符号补充
|
|
79
|
+
(0x3105, 0x3129), # 注音字母
|
|
80
|
+
(0x31A0, 0x31BF), # 注音字母扩展
|
|
81
|
+
(0x3040, 0x309F), # 平假名
|
|
82
|
+
(0x30A0, 0x30FF), # 片假名
|
|
83
|
+
(0x31F0, 0x31FF), # 片假名扩展
|
|
84
|
+
(0xAC00, 0xD7AF), # 韩文音节
|
|
85
|
+
(0x1100, 0x11FF), # 韩文字母
|
|
86
|
+
(0xA960, 0xA97F), # 韩文字母扩展 A
|
|
87
|
+
(0xD7B0, 0xD7FF), # 韩文字母扩展 B
|
|
88
|
+
]
|
|
89
|
+
for start, end in ranges:
|
|
90
|
+
if start <= code_point <= end:
|
|
91
|
+
return True
|
|
92
|
+
return False
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def is_all_symbols(text):
|
|
96
|
+
# 是否全是符号
|
|
97
|
+
# 如果字符串为空,返回 False
|
|
98
|
+
if not text:
|
|
99
|
+
return False
|
|
100
|
+
|
|
101
|
+
# 检查每个字符是否属于符号类别
|
|
102
|
+
return all(unicodedata.category(char).startswith(('P', 'S')) for char in text)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def is_whole_word_en(sub_str: str, long_str: str) -> bool:
|
|
106
|
+
"""
|
|
107
|
+
判断 sub_str 在 long_str 中是否是一个完整的单词(而不是其他单词的一部分)。
|
|
108
|
+
|
|
109
|
+
参数:
|
|
110
|
+
sub_str: 要搜索的单词
|
|
111
|
+
long_str: 被搜索的文本
|
|
112
|
+
|
|
113
|
+
返回:
|
|
114
|
+
bool: 如果 sub_str 是 long_str 中的一个完整单词则返回 True,否则返回 False
|
|
115
|
+
"""
|
|
116
|
+
regex_pattern = re.compile(r"[^a-z0-9]")
|
|
117
|
+
|
|
118
|
+
if not sub_str or not long_str:
|
|
119
|
+
return False
|
|
120
|
+
|
|
121
|
+
# 使用 startsWith 和 endsWith 检查边界
|
|
122
|
+
if long_str.startswith(sub_str) and long_str.endswith(sub_str):
|
|
123
|
+
return True
|
|
124
|
+
|
|
125
|
+
# 检查是否在中间位置,且前后有非字母数字字符
|
|
126
|
+
index = long_str.find(sub_str)
|
|
127
|
+
if index >= 0:
|
|
128
|
+
if index == 0:
|
|
129
|
+
is_start = True
|
|
130
|
+
else:
|
|
131
|
+
is_start = bool(regex_pattern.match(long_str[index - 1]))
|
|
132
|
+
|
|
133
|
+
if len(long_str) == len(sub_str) + index:
|
|
134
|
+
is_end = True
|
|
135
|
+
else:
|
|
136
|
+
is_end = bool(regex_pattern.match(long_str[index + len(sub_str)]))
|
|
137
|
+
|
|
138
|
+
return is_start and is_end
|
|
139
|
+
else:
|
|
140
|
+
return False
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def is_whole_word(sub_str: str, long_str: str) -> bool:
|
|
144
|
+
if contains_chinese_chars(sub_str):
|
|
145
|
+
return True
|
|
146
|
+
elif is_whole_word_en(sub_str, long_str):
|
|
147
|
+
return True
|
|
148
|
+
else:
|
|
149
|
+
return False
|