re-common 10.0.37__py3-none-any.whl → 10.0.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- re_common/baselibrary/__init__.py +4 -4
- re_common/baselibrary/baseabs/__init__.py +6 -6
- re_common/baselibrary/baseabs/baseabs.py +26 -26
- re_common/baselibrary/database/mbuilder.py +132 -132
- re_common/baselibrary/database/moudle.py +93 -93
- re_common/baselibrary/database/msqlite3.py +194 -194
- re_common/baselibrary/database/mysql.py +169 -169
- re_common/baselibrary/database/sql_factory.py +26 -26
- re_common/baselibrary/mthread/MThreadingRun.py +486 -486
- re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -349
- re_common/baselibrary/mthread/__init__.py +2 -2
- re_common/baselibrary/mthread/mythreading.py +695 -695
- re_common/baselibrary/pakge_other/socks.py +404 -404
- re_common/baselibrary/readconfig/config_factory.py +18 -18
- re_common/baselibrary/readconfig/ini_config.py +317 -317
- re_common/baselibrary/readconfig/toml_config.py +49 -49
- re_common/baselibrary/temporary/envdata.py +36 -36
- re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -118
- re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -102
- re_common/baselibrary/tools/all_requests/mrequest.py +412 -412
- re_common/baselibrary/tools/all_requests/requests_request.py +81 -81
- re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -31
- re_common/baselibrary/tools/contrast_db3.py +123 -123
- re_common/baselibrary/tools/copy_file.py +39 -39
- re_common/baselibrary/tools/db3_2_sizedb3.py +102 -102
- re_common/baselibrary/tools/foreachgz.py +39 -39
- re_common/baselibrary/tools/get_attr.py +10 -10
- re_common/baselibrary/tools/image_to_pdf.py +61 -61
- re_common/baselibrary/tools/java_code_deal.py +139 -139
- re_common/baselibrary/tools/javacode.py +79 -79
- re_common/baselibrary/tools/mdb_db3.py +48 -48
- re_common/baselibrary/tools/merge_file.py +171 -171
- re_common/baselibrary/tools/merge_gz_file.py +165 -165
- re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -42
- re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -42
- re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -38
- re_common/baselibrary/tools/mongo_tools.py +50 -50
- re_common/baselibrary/tools/move_file.py +170 -170
- re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -63
- re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -354
- re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -18
- re_common/baselibrary/tools/move_mongo/use_mv.py +93 -93
- re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -125
- re_common/baselibrary/tools/mpandas/pandas_visualization.py +7 -7
- re_common/baselibrary/tools/myparsel.py +104 -104
- re_common/baselibrary/tools/rename_dir_file.py +37 -37
- re_common/baselibrary/tools/sequoiadb_utils.py +398 -398
- re_common/baselibrary/tools/split_line_to_many.py +25 -25
- re_common/baselibrary/tools/stringtodicts.py +33 -33
- re_common/baselibrary/tools/workwechant_bot.py +84 -84
- re_common/baselibrary/utils/baseaiohttp.py +296 -296
- re_common/baselibrary/utils/baseaiomysql.py +87 -87
- re_common/baselibrary/utils/baseallstep.py +191 -191
- re_common/baselibrary/utils/baseavro.py +19 -19
- re_common/baselibrary/utils/baseboto3.py +291 -291
- re_common/baselibrary/utils/basecsv.py +32 -32
- re_common/baselibrary/utils/basedict.py +133 -133
- re_common/baselibrary/utils/basedir.py +241 -241
- re_common/baselibrary/utils/baseencode.py +351 -351
- re_common/baselibrary/utils/baseencoding.py +28 -28
- re_common/baselibrary/utils/baseesdsl.py +86 -86
- re_common/baselibrary/utils/baseexcel.py +264 -264
- re_common/baselibrary/utils/baseexcept.py +109 -109
- re_common/baselibrary/utils/basefile.py +654 -654
- re_common/baselibrary/utils/baseftp.py +214 -214
- re_common/baselibrary/utils/basegzip.py +60 -60
- re_common/baselibrary/utils/basehdfs.py +135 -135
- re_common/baselibrary/utils/basehttpx.py +268 -268
- re_common/baselibrary/utils/baseip.py +87 -87
- re_common/baselibrary/utils/basejson.py +2 -2
- re_common/baselibrary/utils/baselist.py +32 -32
- re_common/baselibrary/utils/basemotor.py +190 -190
- re_common/baselibrary/utils/basemssql.py +98 -98
- re_common/baselibrary/utils/baseodbc.py +113 -113
- re_common/baselibrary/utils/basepandas.py +302 -302
- re_common/baselibrary/utils/basepeewee.py +11 -11
- re_common/baselibrary/utils/basepika.py +180 -180
- re_common/baselibrary/utils/basepydash.py +143 -143
- re_common/baselibrary/utils/basepymongo.py +230 -230
- re_common/baselibrary/utils/basequeue.py +22 -22
- re_common/baselibrary/utils/baserar.py +57 -57
- re_common/baselibrary/utils/baserequest.py +279 -279
- re_common/baselibrary/utils/baseset.py +8 -8
- re_common/baselibrary/utils/basesmb.py +403 -403
- re_common/baselibrary/utils/basestring.py +382 -382
- re_common/baselibrary/utils/basetime.py +320 -320
- re_common/baselibrary/utils/baseurl.py +121 -121
- re_common/baselibrary/utils/basezip.py +57 -57
- re_common/baselibrary/utils/core/__init__.py +7 -7
- re_common/baselibrary/utils/core/bottomutils.py +18 -18
- re_common/baselibrary/utils/core/mdeprecated.py +327 -327
- re_common/baselibrary/utils/core/mlamada.py +16 -16
- re_common/baselibrary/utils/core/msginfo.py +25 -25
- re_common/baselibrary/utils/core/requests_core.py +103 -103
- re_common/baselibrary/utils/fateadm.py +429 -429
- re_common/baselibrary/utils/importfun.py +123 -123
- re_common/baselibrary/utils/mfaker.py +57 -57
- re_common/baselibrary/utils/my_abc/__init__.py +3 -3
- re_common/baselibrary/utils/my_abc/better_abc.py +32 -32
- re_common/baselibrary/utils/mylogger.py +414 -414
- re_common/baselibrary/utils/myredisclient.py +861 -861
- re_common/baselibrary/utils/pipupgrade.py +21 -21
- re_common/baselibrary/utils/ringlist.py +85 -85
- re_common/baselibrary/utils/version_compare.py +36 -36
- re_common/baselibrary/utils/ydmhttp.py +126 -126
- re_common/facade/lazy_import.py +11 -11
- re_common/facade/loggerfacade.py +25 -25
- re_common/facade/mysqlfacade.py +467 -467
- re_common/facade/now.py +31 -31
- re_common/facade/sqlite3facade.py +257 -257
- re_common/facade/use/mq_use_facade.py +83 -83
- re_common/facade/use/proxy_use_facade.py +19 -19
- re_common/libtest/base_dict_test.py +19 -19
- re_common/libtest/baseavro_test.py +13 -13
- re_common/libtest/basefile_test.py +14 -14
- re_common/libtest/basemssql_test.py +77 -77
- re_common/libtest/baseodbc_test.py +7 -7
- re_common/libtest/basepandas_test.py +38 -38
- re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -14
- re_common/libtest/get_attr_test/settings.py +54 -54
- re_common/libtest/idencode_test.py +53 -53
- re_common/libtest/iniconfig_test.py +35 -35
- re_common/libtest/ip_test.py +34 -34
- re_common/libtest/merge_file_test.py +20 -20
- re_common/libtest/mfaker_test.py +8 -8
- re_common/libtest/mm3_test.py +31 -31
- re_common/libtest/mylogger_test.py +88 -88
- re_common/libtest/myparsel_test.py +27 -27
- re_common/libtest/mysql_test.py +151 -151
- re_common/libtest/pymongo_test.py +21 -21
- re_common/libtest/split_test.py +11 -11
- re_common/libtest/sqlite3_merge_test.py +5 -5
- re_common/libtest/sqlite3_test.py +34 -34
- re_common/libtest/tomlconfig_test.py +30 -30
- re_common/libtest/use_tools_test/__init__.py +2 -2
- re_common/libtest/user/__init__.py +4 -4
- re_common/studio/__init__.py +4 -4
- re_common/studio/assignment_expressions.py +36 -36
- re_common/studio/mydash/test1.py +18 -18
- re_common/studio/pydashstudio/first.py +9 -9
- re_common/studio/streamlitstudio/first_app.py +65 -65
- re_common/studio/streamlitstudio/uber_pickups.py +23 -23
- re_common/studio/test.py +18 -18
- re_common/v2/baselibrary/business_utils/BusinessStringUtil.py +219 -219
- re_common/v2/baselibrary/business_utils/baseencodeid.py +100 -100
- re_common/v2/baselibrary/business_utils/full_doi_path.py +116 -116
- re_common/v2/baselibrary/business_utils/rel_tools.py +6 -6
- re_common/v2/baselibrary/decorators/utils.py +59 -59
- re_common/v2/baselibrary/helpers/search_packge/NearestNeighbors_test.py +105 -105
- re_common/v2/baselibrary/helpers/search_packge/fit_text_match.py +253 -253
- re_common/v2/baselibrary/helpers/search_packge/scikit_learn_text_matcher.py +260 -260
- re_common/v2/baselibrary/helpers/search_packge/test.py +1 -1
- re_common/v2/baselibrary/s3object/baseboto3.py +230 -230
- re_common/v2/baselibrary/tools/WeChatRobot.py +95 -95
- re_common/v2/baselibrary/tools/ac_ahocorasick.py +75 -75
- re_common/v2/baselibrary/tools/concurrency.py +35 -35
- re_common/v2/baselibrary/tools/data_processer/base.py +53 -53
- re_common/v2/baselibrary/tools/data_processer/data_processer.py +508 -508
- re_common/v2/baselibrary/tools/data_processer/data_reader.py +187 -187
- re_common/v2/baselibrary/tools/data_processer/data_writer.py +38 -38
- re_common/v2/baselibrary/tools/dict_tools.py +44 -44
- re_common/v2/baselibrary/tools/dolphinscheduler.py +187 -187
- re_common/v2/baselibrary/tools/hdfs_base_processor.py +204 -204
- re_common/v2/baselibrary/tools/hdfs_bulk_processor.py +67 -67
- re_common/v2/baselibrary/tools/hdfs_data_processer.py +338 -338
- re_common/v2/baselibrary/tools/hdfs_line_processor.py +74 -74
- re_common/v2/baselibrary/tools/list_tools.py +69 -69
- re_common/v2/baselibrary/tools/resume_tracker.py +94 -94
- re_common/v2/baselibrary/tools/search_hash_tools.py +54 -54
- re_common/v2/baselibrary/tools/text_matcher.py +326 -326
- re_common/v2/baselibrary/tools/unionfind_tools.py +60 -60
- re_common/v2/baselibrary/utils/BusinessStringUtil.py +196 -196
- re_common/v2/baselibrary/utils/api_net_utils.py +270 -270
- re_common/v2/baselibrary/utils/author_smi.py +361 -361
- re_common/v2/baselibrary/utils/base_string_similarity.py +158 -158
- re_common/v2/baselibrary/utils/basedict.py +37 -37
- re_common/v2/baselibrary/utils/basehdfs.py +163 -163
- re_common/v2/baselibrary/utils/basepika.py +180 -180
- re_common/v2/baselibrary/utils/basetime.py +77 -77
- re_common/v2/baselibrary/utils/db.py +156 -156
- re_common/v2/baselibrary/utils/json_cls.py +16 -16
- re_common/v2/baselibrary/utils/mq.py +83 -83
- re_common/v2/baselibrary/utils/n_ary_expression_tree.py +243 -243
- re_common/v2/baselibrary/utils/string_bool.py +186 -186
- re_common/v2/baselibrary/utils/string_clear.py +246 -246
- re_common/v2/baselibrary/utils/string_smi.py +18 -18
- re_common/v2/baselibrary/utils/stringutils.py +271 -278
- re_common/vip/base_step_process.py +11 -11
- re_common/vip/baseencodeid.py +90 -90
- re_common/vip/changetaskname.py +28 -28
- re_common/vip/core_var.py +24 -24
- re_common/vip/mmh3Hash.py +89 -89
- re_common/vip/proxy/allproxys.py +127 -127
- re_common/vip/proxy/allproxys_thread.py +159 -159
- re_common/vip/proxy/cnki_proxy.py +153 -153
- re_common/vip/proxy/kuaidaili.py +87 -87
- re_common/vip/proxy/proxy_all.py +113 -113
- re_common/vip/proxy/update_kuaidaili_0.py +42 -42
- re_common/vip/proxy/wanfang_proxy.py +152 -152
- re_common/vip/proxy/wp_proxy_all.py +181 -181
- re_common/vip/read_rawid_to_txt.py +91 -91
- re_common/vip/title/__init__.py +5 -5
- re_common/vip/title/transform/TransformBookTitleToZt.py +125 -125
- re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -139
- re_common/vip/title/transform/TransformCstadTitleToZt.py +195 -195
- re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -203
- re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -132
- re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -114
- re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -135
- re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -135
- re_common/vip/title/transform/__init__.py +10 -10
- {re_common-10.0.37.dist-info → re_common-10.0.39.dist-info}/LICENSE +201 -201
- {re_common-10.0.37.dist-info → re_common-10.0.39.dist-info}/METADATA +16 -16
- re_common-10.0.39.dist-info/RECORD +248 -0
- {re_common-10.0.37.dist-info → re_common-10.0.39.dist-info}/WHEEL +1 -1
- re_common-10.0.37.dist-info/RECORD +0 -248
- {re_common-10.0.37.dist-info → re_common-10.0.39.dist-info}/top_level.txt +0 -0
|
@@ -1,186 +1,186 @@
|
|
|
1
|
-
import re
|
|
2
|
-
|
|
3
|
-
import unicodedata
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def is_ascii_alnum(char: str) -> bool:
|
|
7
|
-
# 精准判断是否为英文或数字字符(ASCII 范围)
|
|
8
|
-
# char.isalnum() 字母或数字(Unicode)包含中文等非英文字符
|
|
9
|
-
return char.isascii() and char.isalnum()
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def is_all_english_chars(s):
|
|
13
|
-
return bool(re.match(r'^[A-Za-z]+$', s))
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def contains_chinese_chars(s):
|
|
17
|
-
return bool(re.search(r'[\u3400-\u9fff]', s))
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def is_empty(value):
|
|
21
|
-
"""
|
|
22
|
-
判断一个值是否为空。
|
|
23
|
-
|
|
24
|
-
支持的类型:
|
|
25
|
-
- None
|
|
26
|
-
- 空字符串(去除空白后)
|
|
27
|
-
- pandas 的 NaN
|
|
28
|
-
- 其他可迭代类型(如列表、字典等)的长度为 0
|
|
29
|
-
- 其他情况返回 False
|
|
30
|
-
"""
|
|
31
|
-
# 如果是 None,直接返回 True
|
|
32
|
-
if value is None:
|
|
33
|
-
return True
|
|
34
|
-
|
|
35
|
-
# 尝试处理 pandas 的 NaN
|
|
36
|
-
try:
|
|
37
|
-
import pandas as pd
|
|
38
|
-
if pd.isna(value):
|
|
39
|
-
return True
|
|
40
|
-
except:
|
|
41
|
-
pass # 如果没有安装 pandas,跳过
|
|
42
|
-
|
|
43
|
-
# 如果是字符串,检查去除空白后是否为空
|
|
44
|
-
if isinstance(value, str):
|
|
45
|
-
return value.strip() == ""
|
|
46
|
-
|
|
47
|
-
# 处理其他可迭代类型(如列表、字典等)
|
|
48
|
-
if hasattr(value, "__len__"):
|
|
49
|
-
return len(value) == 0
|
|
50
|
-
|
|
51
|
-
# 默认情况下,非 None、非空类型返回 False
|
|
52
|
-
return False
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
class InvalidCharLengthError(Exception):
|
|
56
|
-
"""自定义异常类,用于处理输入字符长度不为 1 的情况"""
|
|
57
|
-
pass
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def is_single_cjk_char(char):
|
|
61
|
-
"""
|
|
62
|
-
判断单个字符是否为中日韩字符
|
|
63
|
-
:param char: 要判断的单个字符
|
|
64
|
-
:return: 如果是中日韩字符返回 True,否则返回 False
|
|
65
|
-
"""
|
|
66
|
-
# 检查输入字符的长度
|
|
67
|
-
if len(char) != 1:
|
|
68
|
-
raise InvalidCharLengthError("输入的字符串长度必须为 1,请提供单个字符进行判断。")
|
|
69
|
-
code_point = ord(char)
|
|
70
|
-
# 中日韩统一表意文字
|
|
71
|
-
ranges = [
|
|
72
|
-
(0x4E00, 0x9FFF), # CJK 统一表意符号
|
|
73
|
-
(0x3400, 0x4DBF), # CJK 统一表意符号扩展 A
|
|
74
|
-
(0x20000, 0x2A6DF), # CJK 统一表意符号扩展 B
|
|
75
|
-
(0x2A700, 0x2B73F), # CJK 统一表意符号扩展 C
|
|
76
|
-
(0x2B740, 0x2B81F), # CJK 统一表意符号扩展 D
|
|
77
|
-
(0x2B820, 0x2CEAF), # CJK 统一表意符号扩展 E
|
|
78
|
-
(0x2CEB0, 0x2EBEF), # CJK 统一表意符号扩展 F
|
|
79
|
-
(0x30000, 0x3134F), # CJK 统一表意符号扩展 G
|
|
80
|
-
(0x31350, 0x323AF), # CJK 统一表意符号扩展 H
|
|
81
|
-
(0x3300, 0x33FF), # CJK 兼容符号
|
|
82
|
-
(0xFE30, 0xFE4F), # CJK 兼容形式
|
|
83
|
-
(0xF900, 0xFAFF), # CJK 兼容表意符号
|
|
84
|
-
(0x2F800, 0x2FA1F), # CJK 兼容表意符号补充
|
|
85
|
-
(0x3105, 0x3129), # 注音字母
|
|
86
|
-
(0x31A0, 0x31BF), # 注音字母扩展
|
|
87
|
-
(0x3040, 0x309F), # 平假名
|
|
88
|
-
(0x30A0, 0x30FF), # 片假名
|
|
89
|
-
(0x31F0, 0x31FF), # 片假名扩展
|
|
90
|
-
(0xAC00, 0xD7AF), # 韩文音节
|
|
91
|
-
(0x1100, 0x11FF), # 韩文字母
|
|
92
|
-
(0xA960, 0xA97F), # 韩文字母扩展 A
|
|
93
|
-
(0xD7B0, 0xD7FF), # 韩文字母扩展 B
|
|
94
|
-
]
|
|
95
|
-
for start, end in ranges:
|
|
96
|
-
if start <= code_point <= end:
|
|
97
|
-
return True
|
|
98
|
-
return False
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
def is_all_symbols(text):
|
|
102
|
-
# 是否全是符号
|
|
103
|
-
# 如果字符串为空,返回 False
|
|
104
|
-
if not text:
|
|
105
|
-
return False
|
|
106
|
-
|
|
107
|
-
# 检查每个字符是否属于符号类别
|
|
108
|
-
return all(unicodedata.category(char).startswith(('P', 'S')) for char in text)
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
def is_whole_word_en(sub_str: str, long_str: str) -> bool:
|
|
112
|
-
"""
|
|
113
|
-
判断 sub_str 是否作为 long_str 中的一个完整英文单词(不被其他单词嵌套)。
|
|
114
|
-
|
|
115
|
-
参数:
|
|
116
|
-
sub_str: 要搜索的英文子串
|
|
117
|
-
long_str: 被搜索的字符串
|
|
118
|
-
|
|
119
|
-
返回:
|
|
120
|
-
True 表示 sub_str 是一个完整单词;False 表示是部分单词或不匹配。
|
|
121
|
-
"""
|
|
122
|
-
# 用于 忽略大小写 进行匹配
|
|
123
|
-
regex_pattern = re.compile(r"[^a-z0-9]", re.IGNORECASE) # 用于判断非字母数字字符
|
|
124
|
-
|
|
125
|
-
if not sub_str or not long_str:
|
|
126
|
-
return False
|
|
127
|
-
|
|
128
|
-
# 检查整段是否完全等于 sub_str
|
|
129
|
-
if long_str == sub_str:
|
|
130
|
-
return True
|
|
131
|
-
|
|
132
|
-
# 遍历所有 sub_str 的出现位置
|
|
133
|
-
index = 0
|
|
134
|
-
while index < len(long_str):
|
|
135
|
-
# 从字符串 long_str 的第 index 个位置开始,查找子串 sub_str 第一次出现的位置,并把它赋值给 index。
|
|
136
|
-
index = long_str.find(sub_str, index)
|
|
137
|
-
if index == -1:
|
|
138
|
-
break
|
|
139
|
-
|
|
140
|
-
# 检查 sub_str 前一个字符(如果有)是否为非字母数字
|
|
141
|
-
if index == 0:
|
|
142
|
-
is_start = True
|
|
143
|
-
else:
|
|
144
|
-
is_start = bool(regex_pattern.match(long_str[index - 1]))
|
|
145
|
-
|
|
146
|
-
# 检查 sub_str 后一个字符(如果有)是否为非字母数字
|
|
147
|
-
end_index = index + len(sub_str)
|
|
148
|
-
if end_index == len(long_str):
|
|
149
|
-
is_end = True
|
|
150
|
-
else:
|
|
151
|
-
is_end = bool(regex_pattern.match(long_str[end_index]))
|
|
152
|
-
|
|
153
|
-
if is_start and is_end:
|
|
154
|
-
return True
|
|
155
|
-
|
|
156
|
-
# 移动索引继续查找
|
|
157
|
-
index += 1
|
|
158
|
-
|
|
159
|
-
return False
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
def is_whole_word(sub_str: str, long_str: str) -> bool:
|
|
163
|
-
"""
|
|
164
|
-
判断 sub_str 是否为 long_str 中的一个完整词(适配中英文)。
|
|
165
|
-
中文采用“包含”判断,英文采用完整词匹配。
|
|
166
|
-
|
|
167
|
-
参数:
|
|
168
|
-
sub_str: 要搜索的子串(中英文均可)
|
|
169
|
-
long_str: 被搜索的字符串
|
|
170
|
-
|
|
171
|
-
返回:
|
|
172
|
-
True 表示 sub_str 是一个完整词;False 否则。
|
|
173
|
-
"""
|
|
174
|
-
if contains_chinese_chars(sub_str):
|
|
175
|
-
# 子字符串完全包含在长字符串
|
|
176
|
-
is_contain = sub_str in long_str
|
|
177
|
-
# 是否是字母数字
|
|
178
|
-
if is_ascii_alnum(sub_str[0]) or is_ascii_alnum(sub_str[-1]):
|
|
179
|
-
# 表示中英文混合 看是否是截断单词即可
|
|
180
|
-
return is_whole_word_en(sub_str, long_str)
|
|
181
|
-
else:
|
|
182
|
-
# 中文子串只要被包含即可视为“完整词”
|
|
183
|
-
return is_contain
|
|
184
|
-
else:
|
|
185
|
-
# 英文使用完整单词判断逻辑
|
|
186
|
-
return is_whole_word_en(sub_str, long_str)
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
import unicodedata
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def is_ascii_alnum(char: str) -> bool:
|
|
7
|
+
# 精准判断是否为英文或数字字符(ASCII 范围)
|
|
8
|
+
# char.isalnum() 字母或数字(Unicode)包含中文等非英文字符
|
|
9
|
+
return char.isascii() and char.isalnum()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def is_all_english_chars(s):
|
|
13
|
+
return bool(re.match(r'^[A-Za-z]+$', s))
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def contains_chinese_chars(s):
|
|
17
|
+
return bool(re.search(r'[\u3400-\u9fff]', s))
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def is_empty(value):
|
|
21
|
+
"""
|
|
22
|
+
判断一个值是否为空。
|
|
23
|
+
|
|
24
|
+
支持的类型:
|
|
25
|
+
- None
|
|
26
|
+
- 空字符串(去除空白后)
|
|
27
|
+
- pandas 的 NaN
|
|
28
|
+
- 其他可迭代类型(如列表、字典等)的长度为 0
|
|
29
|
+
- 其他情况返回 False
|
|
30
|
+
"""
|
|
31
|
+
# 如果是 None,直接返回 True
|
|
32
|
+
if value is None:
|
|
33
|
+
return True
|
|
34
|
+
|
|
35
|
+
# 尝试处理 pandas 的 NaN
|
|
36
|
+
try:
|
|
37
|
+
import pandas as pd
|
|
38
|
+
if pd.isna(value):
|
|
39
|
+
return True
|
|
40
|
+
except:
|
|
41
|
+
pass # 如果没有安装 pandas,跳过
|
|
42
|
+
|
|
43
|
+
# 如果是字符串,检查去除空白后是否为空
|
|
44
|
+
if isinstance(value, str):
|
|
45
|
+
return value.strip() == ""
|
|
46
|
+
|
|
47
|
+
# 处理其他可迭代类型(如列表、字典等)
|
|
48
|
+
if hasattr(value, "__len__"):
|
|
49
|
+
return len(value) == 0
|
|
50
|
+
|
|
51
|
+
# 默认情况下,非 None、非空类型返回 False
|
|
52
|
+
return False
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class InvalidCharLengthError(Exception):
|
|
56
|
+
"""自定义异常类,用于处理输入字符长度不为 1 的情况"""
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def is_single_cjk_char(char):
|
|
61
|
+
"""
|
|
62
|
+
判断单个字符是否为中日韩字符
|
|
63
|
+
:param char: 要判断的单个字符
|
|
64
|
+
:return: 如果是中日韩字符返回 True,否则返回 False
|
|
65
|
+
"""
|
|
66
|
+
# 检查输入字符的长度
|
|
67
|
+
if len(char) != 1:
|
|
68
|
+
raise InvalidCharLengthError("输入的字符串长度必须为 1,请提供单个字符进行判断。")
|
|
69
|
+
code_point = ord(char)
|
|
70
|
+
# 中日韩统一表意文字
|
|
71
|
+
ranges = [
|
|
72
|
+
(0x4E00, 0x9FFF), # CJK 统一表意符号
|
|
73
|
+
(0x3400, 0x4DBF), # CJK 统一表意符号扩展 A
|
|
74
|
+
(0x20000, 0x2A6DF), # CJK 统一表意符号扩展 B
|
|
75
|
+
(0x2A700, 0x2B73F), # CJK 统一表意符号扩展 C
|
|
76
|
+
(0x2B740, 0x2B81F), # CJK 统一表意符号扩展 D
|
|
77
|
+
(0x2B820, 0x2CEAF), # CJK 统一表意符号扩展 E
|
|
78
|
+
(0x2CEB0, 0x2EBEF), # CJK 统一表意符号扩展 F
|
|
79
|
+
(0x30000, 0x3134F), # CJK 统一表意符号扩展 G
|
|
80
|
+
(0x31350, 0x323AF), # CJK 统一表意符号扩展 H
|
|
81
|
+
(0x3300, 0x33FF), # CJK 兼容符号
|
|
82
|
+
(0xFE30, 0xFE4F), # CJK 兼容形式
|
|
83
|
+
(0xF900, 0xFAFF), # CJK 兼容表意符号
|
|
84
|
+
(0x2F800, 0x2FA1F), # CJK 兼容表意符号补充
|
|
85
|
+
(0x3105, 0x3129), # 注音字母
|
|
86
|
+
(0x31A0, 0x31BF), # 注音字母扩展
|
|
87
|
+
(0x3040, 0x309F), # 平假名
|
|
88
|
+
(0x30A0, 0x30FF), # 片假名
|
|
89
|
+
(0x31F0, 0x31FF), # 片假名扩展
|
|
90
|
+
(0xAC00, 0xD7AF), # 韩文音节
|
|
91
|
+
(0x1100, 0x11FF), # 韩文字母
|
|
92
|
+
(0xA960, 0xA97F), # 韩文字母扩展 A
|
|
93
|
+
(0xD7B0, 0xD7FF), # 韩文字母扩展 B
|
|
94
|
+
]
|
|
95
|
+
for start, end in ranges:
|
|
96
|
+
if start <= code_point <= end:
|
|
97
|
+
return True
|
|
98
|
+
return False
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def is_all_symbols(text):
|
|
102
|
+
# 是否全是符号
|
|
103
|
+
# 如果字符串为空,返回 False
|
|
104
|
+
if not text:
|
|
105
|
+
return False
|
|
106
|
+
|
|
107
|
+
# 检查每个字符是否属于符号类别
|
|
108
|
+
return all(unicodedata.category(char).startswith(('P', 'S')) for char in text)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def is_whole_word_en(sub_str: str, long_str: str) -> bool:
|
|
112
|
+
"""
|
|
113
|
+
判断 sub_str 是否作为 long_str 中的一个完整英文单词(不被其他单词嵌套)。
|
|
114
|
+
|
|
115
|
+
参数:
|
|
116
|
+
sub_str: 要搜索的英文子串
|
|
117
|
+
long_str: 被搜索的字符串
|
|
118
|
+
|
|
119
|
+
返回:
|
|
120
|
+
True 表示 sub_str 是一个完整单词;False 表示是部分单词或不匹配。
|
|
121
|
+
"""
|
|
122
|
+
# 用于 忽略大小写 进行匹配
|
|
123
|
+
regex_pattern = re.compile(r"[^a-z0-9]", re.IGNORECASE) # 用于判断非字母数字字符
|
|
124
|
+
|
|
125
|
+
if not sub_str or not long_str:
|
|
126
|
+
return False
|
|
127
|
+
|
|
128
|
+
# 检查整段是否完全等于 sub_str
|
|
129
|
+
if long_str == sub_str:
|
|
130
|
+
return True
|
|
131
|
+
|
|
132
|
+
# 遍历所有 sub_str 的出现位置
|
|
133
|
+
index = 0
|
|
134
|
+
while index < len(long_str):
|
|
135
|
+
# 从字符串 long_str 的第 index 个位置开始,查找子串 sub_str 第一次出现的位置,并把它赋值给 index。
|
|
136
|
+
index = long_str.find(sub_str, index)
|
|
137
|
+
if index == -1:
|
|
138
|
+
break
|
|
139
|
+
|
|
140
|
+
# 检查 sub_str 前一个字符(如果有)是否为非字母数字
|
|
141
|
+
if index == 0:
|
|
142
|
+
is_start = True
|
|
143
|
+
else:
|
|
144
|
+
is_start = bool(regex_pattern.match(long_str[index - 1]))
|
|
145
|
+
|
|
146
|
+
# 检查 sub_str 后一个字符(如果有)是否为非字母数字
|
|
147
|
+
end_index = index + len(sub_str)
|
|
148
|
+
if end_index == len(long_str):
|
|
149
|
+
is_end = True
|
|
150
|
+
else:
|
|
151
|
+
is_end = bool(regex_pattern.match(long_str[end_index]))
|
|
152
|
+
|
|
153
|
+
if is_start and is_end:
|
|
154
|
+
return True
|
|
155
|
+
|
|
156
|
+
# 移动索引继续查找
|
|
157
|
+
index += 1
|
|
158
|
+
|
|
159
|
+
return False
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def is_whole_word(sub_str: str, long_str: str) -> bool:
|
|
163
|
+
"""
|
|
164
|
+
判断 sub_str 是否为 long_str 中的一个完整词(适配中英文)。
|
|
165
|
+
中文采用“包含”判断,英文采用完整词匹配。
|
|
166
|
+
|
|
167
|
+
参数:
|
|
168
|
+
sub_str: 要搜索的子串(中英文均可)
|
|
169
|
+
long_str: 被搜索的字符串
|
|
170
|
+
|
|
171
|
+
返回:
|
|
172
|
+
True 表示 sub_str 是一个完整词;False 否则。
|
|
173
|
+
"""
|
|
174
|
+
if contains_chinese_chars(sub_str):
|
|
175
|
+
# 子字符串完全包含在长字符串
|
|
176
|
+
is_contain = sub_str in long_str
|
|
177
|
+
# 是否是字母数字
|
|
178
|
+
if is_ascii_alnum(sub_str[0]) or is_ascii_alnum(sub_str[-1]):
|
|
179
|
+
# 表示中英文混合 看是否是截断单词即可
|
|
180
|
+
return is_whole_word_en(sub_str, long_str)
|
|
181
|
+
else:
|
|
182
|
+
# 中文子串只要被包含即可视为“完整词”
|
|
183
|
+
return is_contain
|
|
184
|
+
else:
|
|
185
|
+
# 英文使用完整单词判断逻辑
|
|
186
|
+
return is_whole_word_en(sub_str, long_str)
|