re-common 10.0.42__py3-none-any.whl → 10.0.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,17 @@
1
+ import gzip
2
+ import io
3
+ from typing import Protocol
4
+
1
5
  import aioboto3
2
6
  import aiofiles
3
7
  from aiobotocore.config import AioConfig
4
8
 
5
9
 
10
+ class AsyncReadable(Protocol):
11
+ async def read(self, n: int = -1) -> bytes:
12
+ ...
13
+
14
+
6
15
  # config = AioConfig(connect_timeout=600000, read_timeout=600000, retries={'max_attempts': 3},
7
16
  # max_pool_connections=10)
8
17
 
@@ -10,6 +19,15 @@ class BaseAioBoto3(object):
10
19
 
11
20
  def __init__(self, aws_access_key_id, aws_secret_access_key, endpoint_url,
12
21
  config=AioConfig(max_pool_connections=10)):
22
+ """
23
+ 初始化华为云 OBS 客户端
24
+
25
+ Args:
26
+ access_key: 华为云 Access Key
27
+ secret_key: 华为云 Secret Key
28
+ region: 区域,如 'cn-north-4'
29
+ endpoint: 华为云 OBS 端点,可选
30
+ """
13
31
  self.aws_access_key_id = aws_access_key_id
14
32
  self.aws_secret_access_key = aws_secret_access_key
15
33
  self.endpoint_url = endpoint_url
@@ -20,7 +38,7 @@ class BaseAioBoto3(object):
20
38
  if self.boto_session is None:
21
39
  self.boto_session = aioboto3.Session(
22
40
  aws_access_key_id=self.aws_access_key_id,
23
- aws_secret_access_key=self.aws_secret_access_key,
41
+ aws_secret_access_key=self.aws_secret_access_key
24
42
  )
25
43
 
26
44
  async def read_minio_data(self, bucket, key):
@@ -30,6 +48,10 @@ class BaseAioBoto3(object):
30
48
  result = await s3_ob["Body"].read()
31
49
  return result
32
50
 
51
+ def ungzip(self, raw_bytes, encoding="utf-8"):
52
+ with gzip.GzipFile(fileobj=io.BytesIO(raw_bytes)) as gz:
53
+ return gz.read().decode(encoding)
54
+
33
55
  # 异步下载大文件
34
56
  async def download_file(self, bucket: str, key: str, local_path: str):
35
57
  await self.initialize_class_variable()
@@ -46,3 +68,55 @@ class BaseAioBoto3(object):
46
68
  await f.write(chunk)
47
69
 
48
70
  return local_path
71
+
72
+ async def list_files(self, bucket: str, prefix: str, recursive: bool = True):
73
+ """
74
+ 获取 bucket 下某个“目录”(prefix) 的文件列表
75
+
76
+ 单文件返回样例 ['server_data/api-title-roc/py_full_organ_dic/', 'server_data/api-title-roc/py_full_organ_dic/part-00000.gz']
77
+
78
+ :param bucket: bucket 名
79
+ :param prefix: 目录前缀,如 'server_data/api-title-roc/'
80
+ :param recursive: 是否递归子目录
81
+ :return: List[str] 文件 key 列表
82
+ """
83
+ await self.initialize_class_variable()
84
+ keys = []
85
+
86
+ # 非递归时,用 delimiter 模拟目录
87
+ extra_args = {}
88
+ if not recursive:
89
+ extra_args["Delimiter"] = "/"
90
+
91
+ async with self.boto_session.client(
92
+ "s3",
93
+ endpoint_url=self.endpoint_url,
94
+ config=self.config
95
+ ) as s3:
96
+
97
+ continuation_token = None
98
+
99
+ while True:
100
+ kwargs = {
101
+ "Bucket": bucket,
102
+ "Prefix": prefix,
103
+ **extra_args
104
+ }
105
+ # 下一页的“游标”
106
+ if continuation_token:
107
+ kwargs["ContinuationToken"] = continuation_token
108
+
109
+ resp = await s3.list_objects_v2(**kwargs)
110
+
111
+ # 文件
112
+ for obj in resp.get("Contents", []):
113
+ keys.append(obj["Key"])
114
+
115
+ # 是否还有下一页
116
+ if resp.get("IsTruncated"): # 说明还有下一页
117
+ # 下一页从哪里继续查
118
+ continuation_token = resp.get("NextContinuationToken")
119
+ else:
120
+ break
121
+
122
+ return keys
@@ -0,0 +1,80 @@
1
+ # pip install pycryptodome==3.10.1
2
+ # pip install esdk-obs-python
3
+ # 引入模块
4
+ import os
5
+
6
+ from obs import ObsClient, GetObjectHeader
7
+
8
+
9
+ class BaseObsClient(object):
10
+
11
+ def __init__(self, aws_access_key_id="", aws_secret_access_key="", endpoint_url=""):
12
+ self.aws_access_key_id = aws_access_key_id
13
+ self.aws_secret_access_key = aws_secret_access_key
14
+ self.endpoint_url = endpoint_url
15
+ self.client = None
16
+ if self.aws_access_key_id and self.aws_secret_access_key and self.endpoint_url:
17
+ self.get_client()
18
+
19
+ def get_client(self):
20
+ self.client = ObsClient(access_key_id=self.aws_access_key_id,
21
+ secret_access_key=self.aws_secret_access_key,
22
+ server=self.endpoint_url)
23
+ return self
24
+
25
+ def close(self):
26
+ self.client.close()
27
+
28
+ def put_object(self, bucket_name, objectKey, body):
29
+ """
30
+ 直接写内容到文件
31
+ Args:
32
+ bucket_name:
33
+ key:
34
+ body: 需要
35
+
36
+ Returns:
37
+ """
38
+ # 上传文本对象
39
+ resp = self.client.putContent(bucket_name, objectKey, body)
40
+ # 返回码为2xx时,接口调用成功,否则接口调用失败
41
+ if resp.status < 300:
42
+ return True, resp
43
+ else:
44
+ return False, resp
45
+
46
+
47
+ def download_memobj(self, bucket_name, objectKey):
48
+ """
49
+ return: None
50
+ """
51
+ # 指定loadStreamInMemory为True忽略downloadpath路径,将文件的二进制流下载到内存
52
+ # 二进制下载对象
53
+ resp = self.client.getObject(bucketName=bucket_name, objectKey=objectKey, loadStreamInMemory=True)
54
+ # 返回码为2xx时,接口调用成功,否则接口调用失败
55
+ if resp.status < 300:
56
+ return True, resp
57
+ else:
58
+ return False, resp
59
+
60
+ def download_file(self, bucket_name, objectKey,downloadPath):
61
+ """
62
+ return: None
63
+ """
64
+ headers = GetObjectHeader()
65
+ resp = self.client.getObject(bucket_name, objectKey, downloadPath, headers=headers)
66
+ # 返回码为2xx时,接口调用成功,否则接口调用失败
67
+ if resp.status < 300:
68
+ return True, resp
69
+ else:
70
+ return False, resp
71
+
72
+
73
+ def list_prefixes(self,bucket_name,prefix, max_keys = 100):
74
+ # 列举桶内对象
75
+ resp = self.client.listObjects(bucket_name, prefix, max_keys=max_keys, encoding_type='url')
76
+ # 返回码为2xx时,接口调用成功,否则接口调用失败
77
+ if resp.status < 300:
78
+ return True, resp
79
+ else:
80
+ return False, resp
@@ -15,6 +15,20 @@ def is_all_english_chars(s):
15
15
 
16
16
 
17
17
  def contains_chinese_chars(s):
18
+ """
19
+ 判断字符串中是否包含中文(汉字)字符
20
+
21
+ 使用 regex 库(不是内置 re),通过 Unicode 属性 IsHan
22
+ 来匹配任意一个汉字。
23
+
24
+ 参数:
25
+ s (str): 待检测的字符串
26
+
27
+ 返回:
28
+ bool:
29
+ True - 字符串中至少包含一个中文字符
30
+ False - 字符串中不包含任何中文字符
31
+ """
18
32
  return bool(regex.search(r"[\p{IsHan}]", s))
19
33
 
20
34
 
@@ -108,56 +122,49 @@ def is_all_symbols(text):
108
122
  # 检查每个字符是否属于符号类别
109
123
  return all(unicodedata.category(char).startswith(('P', 'S')) for char in text)
110
124
 
111
-
112
- def is_whole_word_en(sub_str: str, long_str: str) -> bool:
125
+ def is_whole_word_en_re(organ: str, ele_organ: str) -> bool:
113
126
  """
114
- 判断 sub_str 是否作为 long_str 中的一个完整英文单词(不被其他单词嵌套)。
127
+ is_whole_word_en 效果一致
128
+ """
129
+ if not organ or not ele_organ:
130
+ return False
115
131
 
116
- 参数:
117
- sub_str: 要搜索的英文子串
118
- long_str: 被搜索的字符串
132
+ pattern = rf"(^|[^a-z0-9-]){re.escape(organ)}([^a-z0-9-]|$)"
133
+ return re.search(pattern, ele_organ) is not None
119
134
 
120
- 返回:
121
- True 表示 sub_str 是一个完整单词;False 表示是部分单词或不匹配。
122
- """
123
- # 用于 忽略大小写 进行匹配
124
- regex_pattern = re.compile(r"[^a-z0-9]", re.IGNORECASE) # 用于判断非字母数字字符
135
+ def is_whole_word_en(sub_str: str, long_str: str) -> bool:
136
+ regex_pattern = re.compile(r"[^a-z0-9-]", re.IGNORECASE)
125
137
 
126
138
  if not sub_str or not long_str:
127
139
  return False
128
140
 
129
- # 检查整段是否完全等于 sub_str
130
- if long_str == sub_str:
141
+ # 使用 startsWith 和 endsWith 检查边界
142
+ if long_str.startswith(sub_str) and long_str.endswith(sub_str) and len(sub_str) == len(long_str):
131
143
  return True
132
144
 
133
- # 遍历所有 sub_str 的出现位置
134
- index = 0
135
- while index < len(long_str):
136
- # 从字符串 long_str 的第 index 个位置开始,查找子串 sub_str 第一次出现的位置,并把它赋值给 index。
137
- index = long_str.find(sub_str, index)
138
- if index == -1:
139
- break
140
-
141
- # 检查 sub_str 前一个字符(如果有)是否为非字母数字
142
- if index == 0:
143
- is_start = True
145
+ # 检查是否在中间位置,且前后有非字母数字字符
146
+ # index = long_str.find(sub_str)
147
+ index_list = [m.start() for m in re.finditer(re.escape(sub_str), long_str)]
148
+
149
+ def get_bools(index):
150
+ if index >= 0:
151
+ is_start = False
152
+ is_end = False
153
+ if index == 0:
154
+ is_start = True
155
+ else:
156
+ is_start = bool(regex_pattern.match(long_str[index - 1]))
157
+
158
+ if len(long_str) == len(sub_str) + index:
159
+ is_end = True
160
+ else:
161
+ is_end = bool(regex_pattern.match(long_str[index + len(sub_str)]))
162
+
163
+ return is_start and is_end
144
164
  else:
145
- is_start = bool(regex_pattern.match(long_str[index - 1]))
165
+ return False
146
166
 
147
- # 检查 sub_str 后一个字符(如果有)是否为非字母数字
148
- end_index = index + len(sub_str)
149
- if end_index == len(long_str):
150
- is_end = True
151
- else:
152
- is_end = bool(regex_pattern.match(long_str[end_index]))
153
-
154
- if is_start and is_end:
155
- return True
156
-
157
- # 移动索引继续查找
158
- index += 1
159
-
160
- return False
167
+ return any([get_bools(index) for index in index_list])
161
168
 
162
169
 
163
170
  def is_whole_word(sub_str: str, long_str: str) -> bool:
@@ -178,10 +185,10 @@ def is_whole_word(sub_str: str, long_str: str) -> bool:
178
185
  # 是否是字母数字
179
186
  if is_ascii_alnum(sub_str[0]) or is_ascii_alnum(sub_str[-1]):
180
187
  # 表示中英文混合 看是否是截断单词即可
181
- return is_whole_word_en(sub_str, long_str)
188
+ return is_whole_word_en_re(sub_str, long_str)
182
189
  else:
183
190
  # 中文子串只要被包含即可视为“完整词”
184
191
  return is_contain
185
192
  else:
186
193
  # 英文使用完整单词判断逻辑
187
- return is_whole_word_en(sub_str, long_str)
194
+ return is_whole_word_en_re(sub_str, long_str)
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: re_common
3
- Version: 10.0.42
3
+ Version: 10.0.43
4
4
  Summary: a library about all python projects
5
5
  Home-page: https://gitee.com/xujiangios/re-common
6
6
  Author: vic
@@ -11,6 +11,15 @@ Classifier: Operating System :: OS Independent
11
11
  Requires-Python: >=3.6
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
+ Dynamic: author
15
+ Dynamic: author-email
16
+ Dynamic: classifier
17
+ Dynamic: description
18
+ Dynamic: description-content-type
19
+ Dynamic: home-page
20
+ Dynamic: license-file
21
+ Dynamic: requires-python
22
+ Dynamic: summary
14
23
 
15
24
 
16
25
  这是一个基础类,依赖很多的第三方包,是一个用得到的第三方库的封装,可以在此基础上迅速构建项目
@@ -177,8 +177,9 @@ re_common/v2/baselibrary/helpers/search_packge/fit_text_match.py,sha256=cMN4W7xu
177
177
  re_common/v2/baselibrary/helpers/search_packge/scikit_learn_text_matcher.py,sha256=Ri8Ul2_URq1TVvlXwG0OvqBo9_LSpivvdvjQM7xr01I,9947
178
178
  re_common/v2/baselibrary/helpers/search_packge/test.py,sha256=jYDa6s66jqiz6xEhXMPLqmONFbmfv-EgxaVpdHbGk4U,52
179
179
  re_common/v2/baselibrary/s3object/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
180
- re_common/v2/baselibrary/s3object/baseaioboto3.py,sha256=3WVgcBdohPJut-1MB2QCCuYKf9ynjohMKL3prMjHt94,1973
180
+ re_common/v2/baselibrary/s3object/baseaioboto3.py,sha256=Xqk1z0DyNM127EBH1sstftGSkRNkm6eKlbNRXby38rI,4383
181
181
  re_common/v2/baselibrary/s3object/baseboto3.py,sha256=mXuIFx99pnrPGQ4LJCZwlN1HLbaU-OWLwck0cVzW6hc,11203
182
+ re_common/v2/baselibrary/s3object/huaweiobs.py,sha256=rUXm7S5yZ4T4a_CK0ggsEX4k_Th2jZW2oygGb6xhtW8,2754
182
183
  re_common/v2/baselibrary/tools/WeChatRobot.py,sha256=sKBt2gPsfj0gzV6KaLSAhIhL-j3qNfHfqE-lII1LVwM,3537
183
184
  re_common/v2/baselibrary/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
184
185
  re_common/v2/baselibrary/tools/ac_ahocorasick.py,sha256=c63y5RtKVLD37nyPCnBqfNygwRj4gTQqyIdDOrC65G0,2847
@@ -218,7 +219,7 @@ re_common/v2/baselibrary/utils/json_cls.py,sha256=M93piYtmgm_wP8E57culTrd_AhHLoG
218
219
  re_common/v2/baselibrary/utils/mq.py,sha256=UHpO8iNIHs91Tgp-BgnSUpZwjWquxrGLdpr3FMMv2zw,2858
219
220
  re_common/v2/baselibrary/utils/n_ary_expression_tree.py,sha256=-05kO6G2Rth7CEK-5lfFrthFZ1Q0-0a7cni7mWZ-2gg,9172
220
221
  re_common/v2/baselibrary/utils/pinyin_utils.py,sha256=OXZfVvMjKfCvEbJ6PIwpwWbupU1CSBXJNDnf3jMhC10,7141
221
- re_common/v2/baselibrary/utils/string_bool.py,sha256=no7fdxe2iYUaxQBRA8kkyRhmJGaxxQa3JHQ_5LZLyi8,6210
222
+ re_common/v2/baselibrary/utils/string_bool.py,sha256=FsmrUZw1CCUXrfC9ZejdgKO35320wWffiduAGhl_bFQ,6412
222
223
  re_common/v2/baselibrary/utils/string_clear.py,sha256=Ympa0Cs2y_72QeeyMS8de8y_QgtEFJJQ0AgHnylbMUc,7861
223
224
  re_common/v2/baselibrary/utils/string_smi.py,sha256=cU0WAWHRGnGoVQx3eCEKeM_q_olFNzRTJe7rSe586SY,741
224
225
  re_common/v2/baselibrary/utils/stringutils.py,sha256=hH0pHNvgR_TgulmBPRax9U_sp6bwYG5ksDbdqHRCFvk,10083
@@ -248,8 +249,8 @@ re_common/vip/title/transform/TransformRegulationTitleToZt.py,sha256=LKRdIsWKues
248
249
  re_common/vip/title/transform/TransformStandardTitleToZt.py,sha256=-fCKAbSBzXVyQDCE61CalvR9E_QzQMA08QOO_NePFNI,5563
249
250
  re_common/vip/title/transform/TransformThesisTitleToZt.py,sha256=QS-uV0cQrpUFAcKucuJQ9Ue2VRQH-inmfn_X3IplfRo,5488
250
251
  re_common/vip/title/transform/__init__.py,sha256=m83-CWyRq_VHPYHaALEQlmXrkTdrZ3e4B_kCfBYE-uc,239
251
- re_common-10.0.42.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
252
- re_common-10.0.42.dist-info/METADATA,sha256=OHWLXAG4NpAA8WfEywa-GJlin-KBZ6kQWCaICOrdyA0,582
253
- re_common-10.0.42.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
254
- re_common-10.0.42.dist-info/top_level.txt,sha256=_H9H23zoLIalm1AIY_KYTVh_H0ZnmjxQIxsvXtLv45o,10
255
- re_common-10.0.42.dist-info/RECORD,,
252
+ re_common-10.0.43.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
253
+ re_common-10.0.43.dist-info/METADATA,sha256=wLSvL6sw37eaQmsTV3OYThFy_oqzN4Bx1qcXw36SbNQ,787
254
+ re_common-10.0.43.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
255
+ re_common-10.0.43.dist-info/top_level.txt,sha256=_H9H23zoLIalm1AIY_KYTVh_H0ZnmjxQIxsvXtLv45o,10
256
+ re_common-10.0.43.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5