re-common 10.0.39__py3-none-any.whl → 10.0.40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. re_common/baselibrary/__init__.py +4 -4
  2. re_common/baselibrary/baseabs/__init__.py +6 -6
  3. re_common/baselibrary/baseabs/baseabs.py +26 -26
  4. re_common/baselibrary/database/mbuilder.py +132 -132
  5. re_common/baselibrary/database/moudle.py +93 -93
  6. re_common/baselibrary/database/msqlite3.py +194 -194
  7. re_common/baselibrary/database/mysql.py +169 -169
  8. re_common/baselibrary/database/sql_factory.py +26 -26
  9. re_common/baselibrary/mthread/MThreadingRun.py +486 -486
  10. re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -349
  11. re_common/baselibrary/mthread/__init__.py +2 -2
  12. re_common/baselibrary/mthread/mythreading.py +695 -695
  13. re_common/baselibrary/pakge_other/socks.py +404 -404
  14. re_common/baselibrary/readconfig/config_factory.py +18 -18
  15. re_common/baselibrary/readconfig/ini_config.py +317 -317
  16. re_common/baselibrary/readconfig/toml_config.py +49 -49
  17. re_common/baselibrary/temporary/envdata.py +36 -36
  18. re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -118
  19. re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -102
  20. re_common/baselibrary/tools/all_requests/mrequest.py +412 -412
  21. re_common/baselibrary/tools/all_requests/requests_request.py +81 -81
  22. re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -31
  23. re_common/baselibrary/tools/contrast_db3.py +123 -123
  24. re_common/baselibrary/tools/copy_file.py +39 -39
  25. re_common/baselibrary/tools/db3_2_sizedb3.py +102 -102
  26. re_common/baselibrary/tools/foreachgz.py +39 -39
  27. re_common/baselibrary/tools/get_attr.py +10 -10
  28. re_common/baselibrary/tools/image_to_pdf.py +61 -61
  29. re_common/baselibrary/tools/java_code_deal.py +139 -139
  30. re_common/baselibrary/tools/javacode.py +79 -79
  31. re_common/baselibrary/tools/mdb_db3.py +48 -48
  32. re_common/baselibrary/tools/merge_file.py +171 -171
  33. re_common/baselibrary/tools/merge_gz_file.py +165 -165
  34. re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -42
  35. re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -42
  36. re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -38
  37. re_common/baselibrary/tools/mongo_tools.py +50 -50
  38. re_common/baselibrary/tools/move_file.py +170 -170
  39. re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -63
  40. re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -354
  41. re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -18
  42. re_common/baselibrary/tools/move_mongo/use_mv.py +93 -93
  43. re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -125
  44. re_common/baselibrary/tools/mpandas/pandas_visualization.py +7 -7
  45. re_common/baselibrary/tools/myparsel.py +104 -104
  46. re_common/baselibrary/tools/rename_dir_file.py +37 -37
  47. re_common/baselibrary/tools/sequoiadb_utils.py +398 -398
  48. re_common/baselibrary/tools/split_line_to_many.py +25 -25
  49. re_common/baselibrary/tools/stringtodicts.py +33 -33
  50. re_common/baselibrary/tools/workwechant_bot.py +84 -84
  51. re_common/baselibrary/utils/baseaiohttp.py +296 -296
  52. re_common/baselibrary/utils/baseaiomysql.py +87 -87
  53. re_common/baselibrary/utils/baseallstep.py +191 -191
  54. re_common/baselibrary/utils/baseavro.py +19 -19
  55. re_common/baselibrary/utils/baseboto3.py +291 -291
  56. re_common/baselibrary/utils/basecsv.py +32 -32
  57. re_common/baselibrary/utils/basedict.py +133 -133
  58. re_common/baselibrary/utils/basedir.py +241 -241
  59. re_common/baselibrary/utils/baseencode.py +351 -351
  60. re_common/baselibrary/utils/baseencoding.py +28 -28
  61. re_common/baselibrary/utils/baseesdsl.py +86 -86
  62. re_common/baselibrary/utils/baseexcel.py +264 -264
  63. re_common/baselibrary/utils/baseexcept.py +109 -109
  64. re_common/baselibrary/utils/basefile.py +654 -654
  65. re_common/baselibrary/utils/baseftp.py +214 -214
  66. re_common/baselibrary/utils/basegzip.py +60 -60
  67. re_common/baselibrary/utils/basehdfs.py +135 -135
  68. re_common/baselibrary/utils/basehttpx.py +268 -268
  69. re_common/baselibrary/utils/baseip.py +87 -87
  70. re_common/baselibrary/utils/basejson.py +2 -2
  71. re_common/baselibrary/utils/baselist.py +32 -32
  72. re_common/baselibrary/utils/basemotor.py +190 -190
  73. re_common/baselibrary/utils/basemssql.py +98 -98
  74. re_common/baselibrary/utils/baseodbc.py +113 -113
  75. re_common/baselibrary/utils/basepandas.py +302 -302
  76. re_common/baselibrary/utils/basepeewee.py +11 -11
  77. re_common/baselibrary/utils/basepika.py +180 -180
  78. re_common/baselibrary/utils/basepydash.py +143 -143
  79. re_common/baselibrary/utils/basepymongo.py +230 -230
  80. re_common/baselibrary/utils/basequeue.py +22 -22
  81. re_common/baselibrary/utils/baserar.py +57 -57
  82. re_common/baselibrary/utils/baserequest.py +279 -279
  83. re_common/baselibrary/utils/baseset.py +8 -8
  84. re_common/baselibrary/utils/basesmb.py +403 -403
  85. re_common/baselibrary/utils/basestring.py +382 -382
  86. re_common/baselibrary/utils/basetime.py +320 -320
  87. re_common/baselibrary/utils/baseurl.py +121 -121
  88. re_common/baselibrary/utils/basezip.py +57 -57
  89. re_common/baselibrary/utils/core/__init__.py +7 -7
  90. re_common/baselibrary/utils/core/bottomutils.py +18 -18
  91. re_common/baselibrary/utils/core/mdeprecated.py +327 -327
  92. re_common/baselibrary/utils/core/mlamada.py +16 -16
  93. re_common/baselibrary/utils/core/msginfo.py +25 -25
  94. re_common/baselibrary/utils/core/requests_core.py +103 -103
  95. re_common/baselibrary/utils/fateadm.py +429 -429
  96. re_common/baselibrary/utils/importfun.py +123 -123
  97. re_common/baselibrary/utils/mfaker.py +57 -57
  98. re_common/baselibrary/utils/my_abc/__init__.py +3 -3
  99. re_common/baselibrary/utils/my_abc/better_abc.py +32 -32
  100. re_common/baselibrary/utils/mylogger.py +414 -414
  101. re_common/baselibrary/utils/myredisclient.py +861 -861
  102. re_common/baselibrary/utils/pipupgrade.py +21 -21
  103. re_common/baselibrary/utils/ringlist.py +85 -85
  104. re_common/baselibrary/utils/version_compare.py +36 -36
  105. re_common/baselibrary/utils/ydmhttp.py +126 -126
  106. re_common/facade/lazy_import.py +11 -11
  107. re_common/facade/loggerfacade.py +25 -25
  108. re_common/facade/mysqlfacade.py +467 -467
  109. re_common/facade/now.py +31 -31
  110. re_common/facade/sqlite3facade.py +257 -257
  111. re_common/facade/use/mq_use_facade.py +83 -83
  112. re_common/facade/use/proxy_use_facade.py +19 -19
  113. re_common/libtest/base_dict_test.py +19 -19
  114. re_common/libtest/baseavro_test.py +13 -13
  115. re_common/libtest/basefile_test.py +14 -14
  116. re_common/libtest/basemssql_test.py +77 -77
  117. re_common/libtest/baseodbc_test.py +7 -7
  118. re_common/libtest/basepandas_test.py +38 -38
  119. re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -14
  120. re_common/libtest/get_attr_test/settings.py +54 -54
  121. re_common/libtest/idencode_test.py +53 -53
  122. re_common/libtest/iniconfig_test.py +35 -35
  123. re_common/libtest/ip_test.py +34 -34
  124. re_common/libtest/merge_file_test.py +20 -20
  125. re_common/libtest/mfaker_test.py +8 -8
  126. re_common/libtest/mm3_test.py +31 -31
  127. re_common/libtest/mylogger_test.py +88 -88
  128. re_common/libtest/myparsel_test.py +27 -27
  129. re_common/libtest/mysql_test.py +151 -151
  130. re_common/libtest/pymongo_test.py +21 -21
  131. re_common/libtest/split_test.py +11 -11
  132. re_common/libtest/sqlite3_merge_test.py +5 -5
  133. re_common/libtest/sqlite3_test.py +34 -34
  134. re_common/libtest/tomlconfig_test.py +30 -30
  135. re_common/libtest/use_tools_test/__init__.py +2 -2
  136. re_common/libtest/user/__init__.py +4 -4
  137. re_common/studio/__init__.py +4 -4
  138. re_common/studio/assignment_expressions.py +36 -36
  139. re_common/studio/mydash/test1.py +18 -18
  140. re_common/studio/pydashstudio/first.py +9 -9
  141. re_common/studio/streamlitstudio/first_app.py +65 -65
  142. re_common/studio/streamlitstudio/uber_pickups.py +23 -23
  143. re_common/studio/test.py +18 -18
  144. re_common/v2/baselibrary/business_utils/BusinessStringUtil.py +219 -219
  145. re_common/v2/baselibrary/business_utils/baseencodeid.py +100 -100
  146. re_common/v2/baselibrary/business_utils/full_doi_path.py +116 -116
  147. re_common/v2/baselibrary/business_utils/rel_tools.py +6 -6
  148. re_common/v2/baselibrary/decorators/utils.py +59 -59
  149. re_common/v2/baselibrary/helpers/search_packge/NearestNeighbors_test.py +105 -105
  150. re_common/v2/baselibrary/helpers/search_packge/fit_text_match.py +253 -253
  151. re_common/v2/baselibrary/helpers/search_packge/scikit_learn_text_matcher.py +260 -260
  152. re_common/v2/baselibrary/helpers/search_packge/test.py +1 -1
  153. re_common/v2/baselibrary/s3object/baseboto3.py +230 -230
  154. re_common/v2/baselibrary/tools/WeChatRobot.py +95 -95
  155. re_common/v2/baselibrary/tools/ac_ahocorasick.py +75 -75
  156. re_common/v2/baselibrary/tools/concurrency.py +35 -35
  157. re_common/v2/baselibrary/tools/data_processer/base.py +53 -53
  158. re_common/v2/baselibrary/tools/data_processer/data_processer.py +508 -508
  159. re_common/v2/baselibrary/tools/data_processer/data_reader.py +187 -187
  160. re_common/v2/baselibrary/tools/data_processer/data_writer.py +38 -38
  161. re_common/v2/baselibrary/tools/dict_tools.py +44 -44
  162. re_common/v2/baselibrary/tools/dolphinscheduler.py +187 -187
  163. re_common/v2/baselibrary/tools/hdfs_base_processor.py +204 -204
  164. re_common/v2/baselibrary/tools/hdfs_bulk_processor.py +67 -67
  165. re_common/v2/baselibrary/tools/hdfs_data_processer.py +338 -338
  166. re_common/v2/baselibrary/tools/hdfs_line_processor.py +74 -74
  167. re_common/v2/baselibrary/tools/list_tools.py +69 -69
  168. re_common/v2/baselibrary/tools/resume_tracker.py +94 -94
  169. re_common/v2/baselibrary/tools/search_hash_tools.py +54 -54
  170. re_common/v2/baselibrary/tools/text_matcher.py +326 -326
  171. re_common/v2/baselibrary/tools/unionfind_tools.py +60 -60
  172. re_common/v2/baselibrary/utils/BusinessStringUtil.py +196 -196
  173. re_common/v2/baselibrary/utils/api_net_utils.py +270 -270
  174. re_common/v2/baselibrary/utils/author_smi.py +361 -361
  175. re_common/v2/baselibrary/utils/base_string_similarity.py +158 -158
  176. re_common/v2/baselibrary/utils/basedict.py +37 -37
  177. re_common/v2/baselibrary/utils/basehdfs.py +163 -163
  178. re_common/v2/baselibrary/utils/basepika.py +180 -180
  179. re_common/v2/baselibrary/utils/basetime.py +77 -77
  180. re_common/v2/baselibrary/utils/db.py +156 -156
  181. re_common/v2/baselibrary/utils/elasticsearch.py +46 -0
  182. re_common/v2/baselibrary/utils/json_cls.py +16 -16
  183. re_common/v2/baselibrary/utils/mq.py +83 -83
  184. re_common/v2/baselibrary/utils/n_ary_expression_tree.py +243 -243
  185. re_common/v2/baselibrary/utils/string_bool.py +186 -186
  186. re_common/v2/baselibrary/utils/string_clear.py +246 -246
  187. re_common/v2/baselibrary/utils/string_smi.py +18 -18
  188. re_common/v2/baselibrary/utils/stringutils.py +271 -271
  189. re_common/vip/base_step_process.py +11 -11
  190. re_common/vip/baseencodeid.py +90 -90
  191. re_common/vip/changetaskname.py +28 -28
  192. re_common/vip/core_var.py +24 -24
  193. re_common/vip/mmh3Hash.py +89 -89
  194. re_common/vip/proxy/allproxys.py +127 -127
  195. re_common/vip/proxy/allproxys_thread.py +159 -159
  196. re_common/vip/proxy/cnki_proxy.py +153 -153
  197. re_common/vip/proxy/kuaidaili.py +87 -87
  198. re_common/vip/proxy/proxy_all.py +113 -113
  199. re_common/vip/proxy/update_kuaidaili_0.py +42 -42
  200. re_common/vip/proxy/wanfang_proxy.py +152 -152
  201. re_common/vip/proxy/wp_proxy_all.py +181 -181
  202. re_common/vip/read_rawid_to_txt.py +91 -91
  203. re_common/vip/title/__init__.py +5 -5
  204. re_common/vip/title/transform/TransformBookTitleToZt.py +125 -125
  205. re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -139
  206. re_common/vip/title/transform/TransformCstadTitleToZt.py +195 -195
  207. re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -203
  208. re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -132
  209. re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -114
  210. re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -135
  211. re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -135
  212. re_common/vip/title/transform/__init__.py +10 -10
  213. {re_common-10.0.39.dist-info → re_common-10.0.40.dist-info}/LICENSE +201 -201
  214. {re_common-10.0.39.dist-info → re_common-10.0.40.dist-info}/METADATA +24 -16
  215. re_common-10.0.40.dist-info/RECORD +249 -0
  216. {re_common-10.0.39.dist-info → re_common-10.0.40.dist-info}/WHEEL +1 -1
  217. re_common-10.0.39.dist-info/RECORD +0 -248
  218. {re_common-10.0.39.dist-info → re_common-10.0.40.dist-info}/top_level.txt +0 -0
@@ -1,186 +1,186 @@
1
- import re
2
-
3
- import unicodedata
4
-
5
-
6
- def is_ascii_alnum(char: str) -> bool:
7
- # 精准判断是否为英文或数字字符(ASCII 范围)
8
- # char.isalnum() 字母或数字(Unicode)包含中文等非英文字符
9
- return char.isascii() and char.isalnum()
10
-
11
-
12
- def is_all_english_chars(s):
13
- return bool(re.match(r'^[A-Za-z]+$', s))
14
-
15
-
16
- def contains_chinese_chars(s):
17
- return bool(re.search(r'[\u3400-\u9fff]', s))
18
-
19
-
20
- def is_empty(value):
21
- """
22
- 判断一个值是否为空。
23
-
24
- 支持的类型:
25
- - None
26
- - 空字符串(去除空白后)
27
- - pandas 的 NaN
28
- - 其他可迭代类型(如列表、字典等)的长度为 0
29
- - 其他情况返回 False
30
- """
31
- # 如果是 None,直接返回 True
32
- if value is None:
33
- return True
34
-
35
- # 尝试处理 pandas 的 NaN
36
- try:
37
- import pandas as pd
38
- if pd.isna(value):
39
- return True
40
- except:
41
- pass # 如果没有安装 pandas,跳过
42
-
43
- # 如果是字符串,检查去除空白后是否为空
44
- if isinstance(value, str):
45
- return value.strip() == ""
46
-
47
- # 处理其他可迭代类型(如列表、字典等)
48
- if hasattr(value, "__len__"):
49
- return len(value) == 0
50
-
51
- # 默认情况下,非 None、非空类型返回 False
52
- return False
53
-
54
-
55
- class InvalidCharLengthError(Exception):
56
- """自定义异常类,用于处理输入字符长度不为 1 的情况"""
57
- pass
58
-
59
-
60
- def is_single_cjk_char(char):
61
- """
62
- 判断单个字符是否为中日韩字符
63
- :param char: 要判断的单个字符
64
- :return: 如果是中日韩字符返回 True,否则返回 False
65
- """
66
- # 检查输入字符的长度
67
- if len(char) != 1:
68
- raise InvalidCharLengthError("输入的字符串长度必须为 1,请提供单个字符进行判断。")
69
- code_point = ord(char)
70
- # 中日韩统一表意文字
71
- ranges = [
72
- (0x4E00, 0x9FFF), # CJK 统一表意符号
73
- (0x3400, 0x4DBF), # CJK 统一表意符号扩展 A
74
- (0x20000, 0x2A6DF), # CJK 统一表意符号扩展 B
75
- (0x2A700, 0x2B73F), # CJK 统一表意符号扩展 C
76
- (0x2B740, 0x2B81F), # CJK 统一表意符号扩展 D
77
- (0x2B820, 0x2CEAF), # CJK 统一表意符号扩展 E
78
- (0x2CEB0, 0x2EBEF), # CJK 统一表意符号扩展 F
79
- (0x30000, 0x3134F), # CJK 统一表意符号扩展 G
80
- (0x31350, 0x323AF), # CJK 统一表意符号扩展 H
81
- (0x3300, 0x33FF), # CJK 兼容符号
82
- (0xFE30, 0xFE4F), # CJK 兼容形式
83
- (0xF900, 0xFAFF), # CJK 兼容表意符号
84
- (0x2F800, 0x2FA1F), # CJK 兼容表意符号补充
85
- (0x3105, 0x3129), # 注音字母
86
- (0x31A0, 0x31BF), # 注音字母扩展
87
- (0x3040, 0x309F), # 平假名
88
- (0x30A0, 0x30FF), # 片假名
89
- (0x31F0, 0x31FF), # 片假名扩展
90
- (0xAC00, 0xD7AF), # 韩文音节
91
- (0x1100, 0x11FF), # 韩文字母
92
- (0xA960, 0xA97F), # 韩文字母扩展 A
93
- (0xD7B0, 0xD7FF), # 韩文字母扩展 B
94
- ]
95
- for start, end in ranges:
96
- if start <= code_point <= end:
97
- return True
98
- return False
99
-
100
-
101
- def is_all_symbols(text):
102
- # 是否全是符号
103
- # 如果字符串为空,返回 False
104
- if not text:
105
- return False
106
-
107
- # 检查每个字符是否属于符号类别
108
- return all(unicodedata.category(char).startswith(('P', 'S')) for char in text)
109
-
110
-
111
- def is_whole_word_en(sub_str: str, long_str: str) -> bool:
112
- """
113
- 判断 sub_str 是否作为 long_str 中的一个完整英文单词(不被其他单词嵌套)。
114
-
115
- 参数:
116
- sub_str: 要搜索的英文子串
117
- long_str: 被搜索的字符串
118
-
119
- 返回:
120
- True 表示 sub_str 是一个完整单词;False 表示是部分单词或不匹配。
121
- """
122
- # 用于 忽略大小写 进行匹配
123
- regex_pattern = re.compile(r"[^a-z0-9]", re.IGNORECASE) # 用于判断非字母数字字符
124
-
125
- if not sub_str or not long_str:
126
- return False
127
-
128
- # 检查整段是否完全等于 sub_str
129
- if long_str == sub_str:
130
- return True
131
-
132
- # 遍历所有 sub_str 的出现位置
133
- index = 0
134
- while index < len(long_str):
135
- # 从字符串 long_str 的第 index 个位置开始,查找子串 sub_str 第一次出现的位置,并把它赋值给 index。
136
- index = long_str.find(sub_str, index)
137
- if index == -1:
138
- break
139
-
140
- # 检查 sub_str 前一个字符(如果有)是否为非字母数字
141
- if index == 0:
142
- is_start = True
143
- else:
144
- is_start = bool(regex_pattern.match(long_str[index - 1]))
145
-
146
- # 检查 sub_str 后一个字符(如果有)是否为非字母数字
147
- end_index = index + len(sub_str)
148
- if end_index == len(long_str):
149
- is_end = True
150
- else:
151
- is_end = bool(regex_pattern.match(long_str[end_index]))
152
-
153
- if is_start and is_end:
154
- return True
155
-
156
- # 移动索引继续查找
157
- index += 1
158
-
159
- return False
160
-
161
-
162
- def is_whole_word(sub_str: str, long_str: str) -> bool:
163
- """
164
- 判断 sub_str 是否为 long_str 中的一个完整词(适配中英文)。
165
- 中文采用“包含”判断,英文采用完整词匹配。
166
-
167
- 参数:
168
- sub_str: 要搜索的子串(中英文均可)
169
- long_str: 被搜索的字符串
170
-
171
- 返回:
172
- True 表示 sub_str 是一个完整词;False 否则。
173
- """
174
- if contains_chinese_chars(sub_str):
175
- # 子字符串完全包含在长字符串
176
- is_contain = sub_str in long_str
177
- # 是否是字母数字
178
- if is_ascii_alnum(sub_str[0]) or is_ascii_alnum(sub_str[-1]):
179
- # 表示中英文混合 看是否是截断单词即可
180
- return is_whole_word_en(sub_str, long_str)
181
- else:
182
- # 中文子串只要被包含即可视为“完整词”
183
- return is_contain
184
- else:
185
- # 英文使用完整单词判断逻辑
186
- return is_whole_word_en(sub_str, long_str)
1
+ import re
2
+
3
+ import unicodedata
4
+
5
+
6
+ def is_ascii_alnum(char: str) -> bool:
7
+ # 精准判断是否为英文或数字字符(ASCII 范围)
8
+ # char.isalnum() 字母或数字(Unicode)包含中文等非英文字符
9
+ return char.isascii() and char.isalnum()
10
+
11
+
12
+ def is_all_english_chars(s):
13
+ return bool(re.match(r'^[A-Za-z]+$', s))
14
+
15
+
16
+ def contains_chinese_chars(s):
17
+ return bool(re.search(r'[\u3400-\u9fff]', s))
18
+
19
+
20
+ def is_empty(value):
21
+ """
22
+ 判断一个值是否为空。
23
+
24
+ 支持的类型:
25
+ - None
26
+ - 空字符串(去除空白后)
27
+ - pandas 的 NaN
28
+ - 其他可迭代类型(如列表、字典等)的长度为 0
29
+ - 其他情况返回 False
30
+ """
31
+ # 如果是 None,直接返回 True
32
+ if value is None:
33
+ return True
34
+
35
+ # 尝试处理 pandas 的 NaN
36
+ try:
37
+ import pandas as pd
38
+ if pd.isna(value):
39
+ return True
40
+ except:
41
+ pass # 如果没有安装 pandas,跳过
42
+
43
+ # 如果是字符串,检查去除空白后是否为空
44
+ if isinstance(value, str):
45
+ return value.strip() == ""
46
+
47
+ # 处理其他可迭代类型(如列表、字典等)
48
+ if hasattr(value, "__len__"):
49
+ return len(value) == 0
50
+
51
+ # 默认情况下,非 None、非空类型返回 False
52
+ return False
53
+
54
+
55
+ class InvalidCharLengthError(Exception):
56
+ """自定义异常类,用于处理输入字符长度不为 1 的情况"""
57
+ pass
58
+
59
+
60
+ def is_single_cjk_char(char):
61
+ """
62
+ 判断单个字符是否为中日韩字符
63
+ :param char: 要判断的单个字符
64
+ :return: 如果是中日韩字符返回 True,否则返回 False
65
+ """
66
+ # 检查输入字符的长度
67
+ if len(char) != 1:
68
+ raise InvalidCharLengthError("输入的字符串长度必须为 1,请提供单个字符进行判断。")
69
+ code_point = ord(char)
70
+ # 中日韩统一表意文字
71
+ ranges = [
72
+ (0x4E00, 0x9FFF), # CJK 统一表意符号
73
+ (0x3400, 0x4DBF), # CJK 统一表意符号扩展 A
74
+ (0x20000, 0x2A6DF), # CJK 统一表意符号扩展 B
75
+ (0x2A700, 0x2B73F), # CJK 统一表意符号扩展 C
76
+ (0x2B740, 0x2B81F), # CJK 统一表意符号扩展 D
77
+ (0x2B820, 0x2CEAF), # CJK 统一表意符号扩展 E
78
+ (0x2CEB0, 0x2EBEF), # CJK 统一表意符号扩展 F
79
+ (0x30000, 0x3134F), # CJK 统一表意符号扩展 G
80
+ (0x31350, 0x323AF), # CJK 统一表意符号扩展 H
81
+ (0x3300, 0x33FF), # CJK 兼容符号
82
+ (0xFE30, 0xFE4F), # CJK 兼容形式
83
+ (0xF900, 0xFAFF), # CJK 兼容表意符号
84
+ (0x2F800, 0x2FA1F), # CJK 兼容表意符号补充
85
+ (0x3105, 0x3129), # 注音字母
86
+ (0x31A0, 0x31BF), # 注音字母扩展
87
+ (0x3040, 0x309F), # 平假名
88
+ (0x30A0, 0x30FF), # 片假名
89
+ (0x31F0, 0x31FF), # 片假名扩展
90
+ (0xAC00, 0xD7AF), # 韩文音节
91
+ (0x1100, 0x11FF), # 韩文字母
92
+ (0xA960, 0xA97F), # 韩文字母扩展 A
93
+ (0xD7B0, 0xD7FF), # 韩文字母扩展 B
94
+ ]
95
+ for start, end in ranges:
96
+ if start <= code_point <= end:
97
+ return True
98
+ return False
99
+
100
+
101
+ def is_all_symbols(text):
102
+ # 是否全是符号
103
+ # 如果字符串为空,返回 False
104
+ if not text:
105
+ return False
106
+
107
+ # 检查每个字符是否属于符号类别
108
+ return all(unicodedata.category(char).startswith(('P', 'S')) for char in text)
109
+
110
+
111
+ def is_whole_word_en(sub_str: str, long_str: str) -> bool:
112
+ """
113
+ 判断 sub_str 是否作为 long_str 中的一个完整英文单词(不被其他单词嵌套)。
114
+
115
+ 参数:
116
+ sub_str: 要搜索的英文子串
117
+ long_str: 被搜索的字符串
118
+
119
+ 返回:
120
+ True 表示 sub_str 是一个完整单词;False 表示是部分单词或不匹配。
121
+ """
122
+ # 用于 忽略大小写 进行匹配
123
+ regex_pattern = re.compile(r"[^a-z0-9]", re.IGNORECASE) # 用于判断非字母数字字符
124
+
125
+ if not sub_str or not long_str:
126
+ return False
127
+
128
+ # 检查整段是否完全等于 sub_str
129
+ if long_str == sub_str:
130
+ return True
131
+
132
+ # 遍历所有 sub_str 的出现位置
133
+ index = 0
134
+ while index < len(long_str):
135
+ # 从字符串 long_str 的第 index 个位置开始,查找子串 sub_str 第一次出现的位置,并把它赋值给 index。
136
+ index = long_str.find(sub_str, index)
137
+ if index == -1:
138
+ break
139
+
140
+ # 检查 sub_str 前一个字符(如果有)是否为非字母数字
141
+ if index == 0:
142
+ is_start = True
143
+ else:
144
+ is_start = bool(regex_pattern.match(long_str[index - 1]))
145
+
146
+ # 检查 sub_str 后一个字符(如果有)是否为非字母数字
147
+ end_index = index + len(sub_str)
148
+ if end_index == len(long_str):
149
+ is_end = True
150
+ else:
151
+ is_end = bool(regex_pattern.match(long_str[end_index]))
152
+
153
+ if is_start and is_end:
154
+ return True
155
+
156
+ # 移动索引继续查找
157
+ index += 1
158
+
159
+ return False
160
+
161
+
162
+ def is_whole_word(sub_str: str, long_str: str) -> bool:
163
+ """
164
+ 判断 sub_str 是否为 long_str 中的一个完整词(适配中英文)。
165
+ 中文采用“包含”判断,英文采用完整词匹配。
166
+
167
+ 参数:
168
+ sub_str: 要搜索的子串(中英文均可)
169
+ long_str: 被搜索的字符串
170
+
171
+ 返回:
172
+ True 表示 sub_str 是一个完整词;False 否则。
173
+ """
174
+ if contains_chinese_chars(sub_str):
175
+ # 子字符串完全包含在长字符串
176
+ is_contain = sub_str in long_str
177
+ # 是否是字母数字
178
+ if is_ascii_alnum(sub_str[0]) or is_ascii_alnum(sub_str[-1]):
179
+ # 表示中英文混合 看是否是截断单词即可
180
+ return is_whole_word_en(sub_str, long_str)
181
+ else:
182
+ # 中文子串只要被包含即可视为“完整词”
183
+ return is_contain
184
+ else:
185
+ # 英文使用完整单词判断逻辑
186
+ return is_whole_word_en(sub_str, long_str)