re-common 10.0.41__py3-none-any.whl → 10.0.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- re_common/v2/baselibrary/s3object/baseaioboto3.py +122 -0
- re_common/v2/baselibrary/s3object/huaweiobs.py +80 -0
- re_common/v2/baselibrary/tools/data_processer/base.py +2 -2
- re_common/v2/baselibrary/tools/data_processer/data_reader.py +9 -6
- re_common/v2/baselibrary/tools/dir_file_tools.py +27 -0
- re_common/v2/baselibrary/tools/list_tools.py +18 -1
- re_common/v2/baselibrary/utils/api_net_utils.py +49 -21
- re_common/v2/baselibrary/utils/pinyin_utils.py +178 -0
- re_common/v2/baselibrary/utils/string_bool.py +48 -41
- {re_common-10.0.41.dist-info → re_common-10.0.43.dist-info}/METADATA +11 -2
- {re_common-10.0.41.dist-info → re_common-10.0.43.dist-info}/RECORD +14 -10
- {re_common-10.0.41.dist-info → re_common-10.0.43.dist-info}/WHEEL +1 -1
- {re_common-10.0.41.dist-info → re_common-10.0.43.dist-info/licenses}/LICENSE +0 -0
- {re_common-10.0.41.dist-info → re_common-10.0.43.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import gzip
|
|
2
|
+
import io
|
|
3
|
+
from typing import Protocol
|
|
4
|
+
|
|
5
|
+
import aioboto3
|
|
6
|
+
import aiofiles
|
|
7
|
+
from aiobotocore.config import AioConfig
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class AsyncReadable(Protocol):
|
|
11
|
+
async def read(self, n: int = -1) -> bytes:
|
|
12
|
+
...
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# config = AioConfig(connect_timeout=600000, read_timeout=600000, retries={'max_attempts': 3},
|
|
16
|
+
# max_pool_connections=10)
|
|
17
|
+
|
|
18
|
+
class BaseAioBoto3(object):
|
|
19
|
+
|
|
20
|
+
def __init__(self, aws_access_key_id, aws_secret_access_key, endpoint_url,
|
|
21
|
+
config=AioConfig(max_pool_connections=10)):
|
|
22
|
+
"""
|
|
23
|
+
初始化华为云 OBS 客户端
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
access_key: 华为云 Access Key
|
|
27
|
+
secret_key: 华为云 Secret Key
|
|
28
|
+
region: 区域,如 'cn-north-4'
|
|
29
|
+
endpoint: 华为云 OBS 端点,可选
|
|
30
|
+
"""
|
|
31
|
+
self.aws_access_key_id = aws_access_key_id
|
|
32
|
+
self.aws_secret_access_key = aws_secret_access_key
|
|
33
|
+
self.endpoint_url = endpoint_url
|
|
34
|
+
self.config = config
|
|
35
|
+
self.boto_session = None
|
|
36
|
+
|
|
37
|
+
async def initialize_class_variable(self):
|
|
38
|
+
if self.boto_session is None:
|
|
39
|
+
self.boto_session = aioboto3.Session(
|
|
40
|
+
aws_access_key_id=self.aws_access_key_id,
|
|
41
|
+
aws_secret_access_key=self.aws_secret_access_key
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
async def read_minio_data(self, bucket, key):
|
|
45
|
+
await self.initialize_class_variable()
|
|
46
|
+
async with self.boto_session.client("s3", endpoint_url=self.endpoint_url, config=self.config) as s3:
|
|
47
|
+
s3_ob = await s3.get_object(Bucket=bucket, Key=key)
|
|
48
|
+
result = await s3_ob["Body"].read()
|
|
49
|
+
return result
|
|
50
|
+
|
|
51
|
+
def ungzip(self, raw_bytes, encoding="utf-8"):
|
|
52
|
+
with gzip.GzipFile(fileobj=io.BytesIO(raw_bytes)) as gz:
|
|
53
|
+
return gz.read().decode(encoding)
|
|
54
|
+
|
|
55
|
+
# 异步下载大文件
|
|
56
|
+
async def download_file(self, bucket: str, key: str, local_path: str):
|
|
57
|
+
await self.initialize_class_variable()
|
|
58
|
+
async with self.boto_session.client("s3", endpoint_url=self.endpoint_url, config=self.config) as s3:
|
|
59
|
+
response = await s3.get_object(Bucket=bucket, Key=key)
|
|
60
|
+
body = response["Body"]
|
|
61
|
+
|
|
62
|
+
# 用异步方式写入本地
|
|
63
|
+
async with aiofiles.open(local_path, "wb") as f:
|
|
64
|
+
while True:
|
|
65
|
+
chunk = await body.read(10 * 1024 * 1024) # 每次读 10MB
|
|
66
|
+
if not chunk:
|
|
67
|
+
break
|
|
68
|
+
await f.write(chunk)
|
|
69
|
+
|
|
70
|
+
return local_path
|
|
71
|
+
|
|
72
|
+
async def list_files(self, bucket: str, prefix: str, recursive: bool = True):
|
|
73
|
+
"""
|
|
74
|
+
获取 bucket 下某个“目录”(prefix) 的文件列表
|
|
75
|
+
|
|
76
|
+
单文件返回样例 ['server_data/api-title-roc/py_full_organ_dic/', 'server_data/api-title-roc/py_full_organ_dic/part-00000.gz']
|
|
77
|
+
|
|
78
|
+
:param bucket: bucket 名
|
|
79
|
+
:param prefix: 目录前缀,如 'server_data/api-title-roc/'
|
|
80
|
+
:param recursive: 是否递归子目录
|
|
81
|
+
:return: List[str] 文件 key 列表
|
|
82
|
+
"""
|
|
83
|
+
await self.initialize_class_variable()
|
|
84
|
+
keys = []
|
|
85
|
+
|
|
86
|
+
# 非递归时,用 delimiter 模拟目录
|
|
87
|
+
extra_args = {}
|
|
88
|
+
if not recursive:
|
|
89
|
+
extra_args["Delimiter"] = "/"
|
|
90
|
+
|
|
91
|
+
async with self.boto_session.client(
|
|
92
|
+
"s3",
|
|
93
|
+
endpoint_url=self.endpoint_url,
|
|
94
|
+
config=self.config
|
|
95
|
+
) as s3:
|
|
96
|
+
|
|
97
|
+
continuation_token = None
|
|
98
|
+
|
|
99
|
+
while True:
|
|
100
|
+
kwargs = {
|
|
101
|
+
"Bucket": bucket,
|
|
102
|
+
"Prefix": prefix,
|
|
103
|
+
**extra_args
|
|
104
|
+
}
|
|
105
|
+
# 下一页的“游标”
|
|
106
|
+
if continuation_token:
|
|
107
|
+
kwargs["ContinuationToken"] = continuation_token
|
|
108
|
+
|
|
109
|
+
resp = await s3.list_objects_v2(**kwargs)
|
|
110
|
+
|
|
111
|
+
# 文件
|
|
112
|
+
for obj in resp.get("Contents", []):
|
|
113
|
+
keys.append(obj["Key"])
|
|
114
|
+
|
|
115
|
+
# 是否还有下一页
|
|
116
|
+
if resp.get("IsTruncated"): # 说明还有下一页
|
|
117
|
+
# 下一页从哪里继续查
|
|
118
|
+
continuation_token = resp.get("NextContinuationToken")
|
|
119
|
+
else:
|
|
120
|
+
break
|
|
121
|
+
|
|
122
|
+
return keys
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# pip install pycryptodome==3.10.1
|
|
2
|
+
# pip install esdk-obs-python
|
|
3
|
+
# 引入模块
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
from obs import ObsClient, GetObjectHeader
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class BaseObsClient(object):
|
|
10
|
+
|
|
11
|
+
def __init__(self, aws_access_key_id="", aws_secret_access_key="", endpoint_url=""):
|
|
12
|
+
self.aws_access_key_id = aws_access_key_id
|
|
13
|
+
self.aws_secret_access_key = aws_secret_access_key
|
|
14
|
+
self.endpoint_url = endpoint_url
|
|
15
|
+
self.client = None
|
|
16
|
+
if self.aws_access_key_id and self.aws_secret_access_key and self.endpoint_url:
|
|
17
|
+
self.get_client()
|
|
18
|
+
|
|
19
|
+
def get_client(self):
|
|
20
|
+
self.client = ObsClient(access_key_id=self.aws_access_key_id,
|
|
21
|
+
secret_access_key=self.aws_secret_access_key,
|
|
22
|
+
server=self.endpoint_url)
|
|
23
|
+
return self
|
|
24
|
+
|
|
25
|
+
def close(self):
|
|
26
|
+
self.client.close()
|
|
27
|
+
|
|
28
|
+
def put_object(self, bucket_name, objectKey, body):
|
|
29
|
+
"""
|
|
30
|
+
直接写内容到文件
|
|
31
|
+
Args:
|
|
32
|
+
bucket_name:
|
|
33
|
+
key:
|
|
34
|
+
body: 需要
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
"""
|
|
38
|
+
# 上传文本对象
|
|
39
|
+
resp = self.client.putContent(bucket_name, objectKey, body)
|
|
40
|
+
# 返回码为2xx时,接口调用成功,否则接口调用失败
|
|
41
|
+
if resp.status < 300:
|
|
42
|
+
return True, resp
|
|
43
|
+
else:
|
|
44
|
+
return False, resp
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def download_memobj(self, bucket_name, objectKey):
|
|
48
|
+
"""
|
|
49
|
+
return: None
|
|
50
|
+
"""
|
|
51
|
+
# 指定loadStreamInMemory为True忽略downloadpath路径,将文件的二进制流下载到内存
|
|
52
|
+
# 二进制下载对象
|
|
53
|
+
resp = self.client.getObject(bucketName=bucket_name, objectKey=objectKey, loadStreamInMemory=True)
|
|
54
|
+
# 返回码为2xx时,接口调用成功,否则接口调用失败
|
|
55
|
+
if resp.status < 300:
|
|
56
|
+
return True, resp
|
|
57
|
+
else:
|
|
58
|
+
return False, resp
|
|
59
|
+
|
|
60
|
+
def download_file(self, bucket_name, objectKey,downloadPath):
|
|
61
|
+
"""
|
|
62
|
+
return: None
|
|
63
|
+
"""
|
|
64
|
+
headers = GetObjectHeader()
|
|
65
|
+
resp = self.client.getObject(bucket_name, objectKey, downloadPath, headers=headers)
|
|
66
|
+
# 返回码为2xx时,接口调用成功,否则接口调用失败
|
|
67
|
+
if resp.status < 300:
|
|
68
|
+
return True, resp
|
|
69
|
+
else:
|
|
70
|
+
return False, resp
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def list_prefixes(self,bucket_name,prefix, max_keys = 100):
|
|
74
|
+
# 列举桶内对象
|
|
75
|
+
resp = self.client.listObjects(bucket_name, prefix, max_keys=max_keys, encoding_type='url')
|
|
76
|
+
# 返回码为2xx时,接口调用成功,否则接口调用失败
|
|
77
|
+
if resp.status < 300:
|
|
78
|
+
return True, resp
|
|
79
|
+
else:
|
|
80
|
+
return False, resp
|
|
@@ -4,9 +4,9 @@ from typing import List, Generator
|
|
|
4
4
|
|
|
5
5
|
class BaseFileReader(ABC):
|
|
6
6
|
|
|
7
|
-
def __init__(self, batch_size: int = 10000):
|
|
7
|
+
def __init__(self, batch_size: int = 10000, read_model: int = 1):
|
|
8
8
|
self.batch_size = batch_size
|
|
9
|
-
self.read_model =
|
|
9
|
+
self.read_model = read_model
|
|
10
10
|
|
|
11
11
|
@abstractmethod
|
|
12
12
|
def list_files(self, path: str) -> List[str]:
|
|
@@ -12,8 +12,9 @@ from re_common.v2.baselibrary.tools.data_processer.base import BaseFileReader
|
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class HDFSFileReader(BaseFileReader):
|
|
15
|
-
def __init__(self, batch_size: int = 1000, hdfs_url: str = "http://VIP-DC-MASTER-2:9870",
|
|
16
|
-
|
|
15
|
+
def __init__(self, batch_size: int = 1000, read_model: int = 1, hdfs_url: str = "http://VIP-DC-MASTER-2:9870",
|
|
16
|
+
hdfs_user: str = "root"):
|
|
17
|
+
super().__init__(batch_size, read_model)
|
|
17
18
|
self.client = InsecureClient(hdfs_url, user=hdfs_user)
|
|
18
19
|
|
|
19
20
|
def list_files(self, path: str) -> List[str]:
|
|
@@ -48,8 +49,9 @@ class HDFSFileReader(BaseFileReader):
|
|
|
48
49
|
|
|
49
50
|
|
|
50
51
|
class HDFSGZFileReader(BaseFileReader):
|
|
51
|
-
def __init__(self, batch_size: int = 1000, hdfs_url: str = "http://VIP-DC-MASTER-2:9870",
|
|
52
|
-
|
|
52
|
+
def __init__(self, batch_size: int = 1000, read_model: int = 1, hdfs_url: str = "http://VIP-DC-MASTER-2:9870",
|
|
53
|
+
hdfs_user: str = "root"):
|
|
54
|
+
super().__init__(batch_size, read_model)
|
|
53
55
|
self.hdfs_url = hdfs_url
|
|
54
56
|
self.hdfs_user = hdfs_user
|
|
55
57
|
self.client = None
|
|
@@ -99,8 +101,9 @@ class HDFSGZFileReader(BaseFileReader):
|
|
|
99
101
|
|
|
100
102
|
|
|
101
103
|
class HDFSParquetFileReader(BaseFileReader):
|
|
102
|
-
def __init__(self, batch_size: int = 1000, hdfs_url: str = "http://VIP-DC-MASTER-2:9870",
|
|
103
|
-
|
|
104
|
+
def __init__(self, batch_size: int = 1000, read_model: int = 1, hdfs_url: str = "http://VIP-DC-MASTER-2:9870",
|
|
105
|
+
hdfs_user: str = "root"):
|
|
106
|
+
super().__init__(batch_size, read_model)
|
|
104
107
|
self.client = InsecureClient(hdfs_url, user=hdfs_user)
|
|
105
108
|
|
|
106
109
|
def list_files(self, path: str) -> List[str]:
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def scan_dir_fast(path):
|
|
6
|
+
file_infos = []
|
|
7
|
+
with os.scandir(path) as entries:
|
|
8
|
+
for entry in entries:
|
|
9
|
+
if entry.is_file():
|
|
10
|
+
info = entry.stat()
|
|
11
|
+
file_infos.append({
|
|
12
|
+
"path": entry.path,
|
|
13
|
+
"size": info.st_size
|
|
14
|
+
})
|
|
15
|
+
return file_infos
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def scan_dir(dir_name, result_file):
|
|
19
|
+
# dir_name r"/share/fulltext/errors"
|
|
20
|
+
# result_file "file_info_errors.txt"
|
|
21
|
+
for root, dirs, files in os.walk(dir_name):
|
|
22
|
+
print(root)
|
|
23
|
+
lists = scan_dir_fast(root)
|
|
24
|
+
with open(result_file, "a", encoding="utf-8") as file:
|
|
25
|
+
for i in lists:
|
|
26
|
+
if i:
|
|
27
|
+
file.write(json.dumps(i, ensure_ascii=False) + "\n")
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import itertools
|
|
2
|
+
from collections import Counter
|
|
2
3
|
from typing import List, Any, Tuple
|
|
3
4
|
|
|
4
5
|
|
|
@@ -67,4 +68,20 @@ def list_to_dict(list_data,key_name):
|
|
|
67
68
|
|
|
68
69
|
def split_list_by_step(lst, step=100):
|
|
69
70
|
# 一维列表按照步长转换成二维列表
|
|
70
|
-
return [lst[i:i + step] for i in range(0, len(lst), step)]
|
|
71
|
+
return [lst[i:i + step] for i in range(0, len(lst), step)]
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def list_diff(l1, l2):
|
|
75
|
+
"""
|
|
76
|
+
非去重差异比较
|
|
77
|
+
Counter 虽然长得像字典,但它在运算符 & 和 - 上有特殊的定义。
|
|
78
|
+
这样 能获取重复差集
|
|
79
|
+
"""
|
|
80
|
+
c1, c2 = Counter(l1), Counter(l2)
|
|
81
|
+
# 共同部分
|
|
82
|
+
common = list((c1 & c2).elements())
|
|
83
|
+
# l1 多余的部分
|
|
84
|
+
extra1 = list((c1 - c2).elements())
|
|
85
|
+
# l2 多余的部分
|
|
86
|
+
extra2 = list((c2 - c1).elements())
|
|
87
|
+
return common, extra1, extra2
|
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
import atexit
|
|
2
|
+
import os
|
|
2
3
|
import sys
|
|
3
4
|
import asyncio
|
|
5
|
+
import traceback
|
|
6
|
+
|
|
4
7
|
import aiohttp
|
|
5
|
-
from typing import Optional
|
|
8
|
+
from typing import Optional, Union
|
|
6
9
|
|
|
7
10
|
from tenacity import retry, stop_after_attempt, wait_random
|
|
8
11
|
|
|
@@ -62,6 +65,13 @@ def on_retry_error(retry_state):
|
|
|
62
65
|
|
|
63
66
|
def on_retry(retry_state):
|
|
64
67
|
# 每次抛错进入该函数打印消息
|
|
68
|
+
|
|
69
|
+
# # 获取函数调用参数
|
|
70
|
+
# args = retry_state.args
|
|
71
|
+
# kwargs = retry_state.kwargs
|
|
72
|
+
#
|
|
73
|
+
# print(id(args[0]._get_session()))
|
|
74
|
+
|
|
65
75
|
print(
|
|
66
76
|
f"[HTTP 请求重试]"
|
|
67
77
|
f"当前重试 : 第 {retry_state.attempt_number} 次"
|
|
@@ -84,6 +94,8 @@ class ApiNetUtils:
|
|
|
84
94
|
_conn: Optional[aiohttp.TCPConnector] = None
|
|
85
95
|
_session: Optional[aiohttp.ClientSession] = None
|
|
86
96
|
_close_registered: bool = False # 确保清理函数只注册一次
|
|
97
|
+
_pid: Optional[int] = None # 当前进程的 PID
|
|
98
|
+
lock = asyncio.Lock()
|
|
87
99
|
|
|
88
100
|
@classmethod
|
|
89
101
|
async def _get_connector(cls) -> aiohttp.TCPConnector:
|
|
@@ -96,9 +108,9 @@ class ApiNetUtils:
|
|
|
96
108
|
cls._conn = aiohttp.TCPConnector(
|
|
97
109
|
limit=50, # 最大连接数
|
|
98
110
|
ssl=False, # 禁用SSL验证(按需开启)
|
|
99
|
-
force_close=
|
|
100
|
-
|
|
101
|
-
|
|
111
|
+
force_close=False, # 保持连接活跃
|
|
112
|
+
enable_cleanup_closed=True, # 自动清理关闭的连接 #
|
|
113
|
+
keepalive_timeout=4.99 # 比服务器的5s 小一点
|
|
102
114
|
)
|
|
103
115
|
return cls._conn
|
|
104
116
|
|
|
@@ -108,25 +120,41 @@ class ApiNetUtils:
|
|
|
108
120
|
获取共享会话(线程安全的延迟初始化)
|
|
109
121
|
包含自动注册清理机制
|
|
110
122
|
"""
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
123
|
+
async with cls.lock:
|
|
124
|
+
current_pid = os.getpid()
|
|
125
|
+
if cls._pid != current_pid:
|
|
126
|
+
# 新进程,重新初始化
|
|
127
|
+
if cls._session:
|
|
128
|
+
await cls.close()
|
|
129
|
+
cls._pid = current_pid
|
|
130
|
+
|
|
131
|
+
if cls._session is None or cls._session.closed or cls.is_loop_closed(cls._session):
|
|
132
|
+
if cls._session:
|
|
133
|
+
await cls.close()
|
|
134
|
+
# 获取连接器(会自动初始化)
|
|
135
|
+
connector = await cls._get_connector()
|
|
136
|
+
|
|
137
|
+
# 强制获取新的事件循环
|
|
138
|
+
loop = asyncio.get_event_loop()
|
|
139
|
+
|
|
140
|
+
timeout = aiohttp.ClientTimeout(
|
|
141
|
+
total=120, # 整个请求最多 30 秒
|
|
142
|
+
connect=10, # 最多 5 秒连接
|
|
143
|
+
sock_connect=10,
|
|
144
|
+
sock_read=110, # 最多 20 秒读取响应数据
|
|
145
|
+
)
|
|
119
146
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
147
|
+
# 创建新会话
|
|
148
|
+
cls._session = aiohttp.ClientSession(
|
|
149
|
+
connector=connector,
|
|
150
|
+
timeout=timeout, # 默认30秒超时
|
|
151
|
+
loop=loop,
|
|
152
|
+
) # 显式指定事件循环
|
|
125
153
|
|
|
126
|
-
|
|
127
|
-
|
|
154
|
+
# # 注册退出时的清理钩子
|
|
155
|
+
cls._register_cleanup()
|
|
128
156
|
|
|
129
|
-
|
|
157
|
+
return cls._session
|
|
130
158
|
|
|
131
159
|
@staticmethod
|
|
132
160
|
def is_loop_closed(session: aiohttp.ClientSession) -> bool:
|
|
@@ -135,7 +163,7 @@ class ApiNetUtils:
|
|
|
135
163
|
"""
|
|
136
164
|
loop = session._loop # 获取会话绑定的事件循环
|
|
137
165
|
if loop.is_closed():
|
|
138
|
-
|
|
166
|
+
print("Event loop is closed")
|
|
139
167
|
return True
|
|
140
168
|
# print("Event loop not is closed")
|
|
141
169
|
return False
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
import itertools
|
|
2
|
+
from typing import List, Any
|
|
3
|
+
|
|
4
|
+
from pypinyin import pinyin, Style
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class PinyinUtils:
|
|
8
|
+
@staticmethod
|
|
9
|
+
def get_pinyin_or_char(text: str) -> str:
|
|
10
|
+
"""拼音 + 非中文原样返回 对应 toPinyin"""
|
|
11
|
+
result = []
|
|
12
|
+
for char in text:
|
|
13
|
+
if PinyinUtils.is_chinese(char):
|
|
14
|
+
py_one = pinyin(char, style=Style.NORMAL, v_to_u=True, heteronym=True)
|
|
15
|
+
result.append(py_one[0][0] if py_one else char)
|
|
16
|
+
else:
|
|
17
|
+
result.append(char)
|
|
18
|
+
return "".join(result)
|
|
19
|
+
|
|
20
|
+
@staticmethod
|
|
21
|
+
def get_pinyin_or_char_ex(text: str) -> str:
|
|
22
|
+
"""拼音 + 非中文原样返回 对应 toPinyin"""
|
|
23
|
+
result = []
|
|
24
|
+
for char in text:
|
|
25
|
+
if PinyinUtils.is_chinese(char):
|
|
26
|
+
py_one = pinyin(char, style=Style.NORMAL, v_to_u=True, heteronym=True)
|
|
27
|
+
result.append("|".join(py_one[0]) if py_one else char)
|
|
28
|
+
else:
|
|
29
|
+
result.append(char)
|
|
30
|
+
return "".join(result)
|
|
31
|
+
|
|
32
|
+
@staticmethod
|
|
33
|
+
def is_chinese(char: str) -> bool:
|
|
34
|
+
"""判断是否为汉字"""
|
|
35
|
+
return "\u4e00" <= char <= "\u9fff"
|
|
36
|
+
|
|
37
|
+
@staticmethod
|
|
38
|
+
def combine(lists: List[List[Any]]) -> list:
|
|
39
|
+
# lists = [[1, 2],['a', 'b'],[True, False]]
|
|
40
|
+
# 计算笛卡尔积
|
|
41
|
+
cartesian_product = list(itertools.product(*lists))
|
|
42
|
+
|
|
43
|
+
return [" ".join([str(ii) for ii in list(i)]) for i in cartesian_product]
|
|
44
|
+
|
|
45
|
+
@staticmethod
|
|
46
|
+
def to_pinyin_names(name: str) -> List[str]:
|
|
47
|
+
"""
|
|
48
|
+
将中文姓名转换为所有可能的拼音组合
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
name: 中文姓名字符串
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
所有可能的拼音组合列表
|
|
55
|
+
"""
|
|
56
|
+
if not name:
|
|
57
|
+
return []
|
|
58
|
+
|
|
59
|
+
pinyin_options = []
|
|
60
|
+
|
|
61
|
+
for char in name:
|
|
62
|
+
py_one = PinyinUtils.get_pinyin_or_char_ex(char)
|
|
63
|
+
variants = []
|
|
64
|
+
|
|
65
|
+
if "ü" in py_one:
|
|
66
|
+
# 处理ü的特殊情况,生成u和v两种变体
|
|
67
|
+
for replacement in ["u", "v"]:
|
|
68
|
+
replaced = py_one.replace("ü", replacement)
|
|
69
|
+
variants.extend(replaced.split("|"))
|
|
70
|
+
variants = list(set(variants))
|
|
71
|
+
else:
|
|
72
|
+
variants = py_one.split("|")
|
|
73
|
+
|
|
74
|
+
pinyin_options.append(variants)
|
|
75
|
+
|
|
76
|
+
return PinyinUtils.combine(pinyin_options)
|
|
77
|
+
|
|
78
|
+
@staticmethod
|
|
79
|
+
def to_pinyin_name(name: str) -> str:
|
|
80
|
+
"""
|
|
81
|
+
将中文姓名转换为拼音字符串
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
name: 中文字符串
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
拼音字符串,用空格分隔每个字的拼音
|
|
88
|
+
"""
|
|
89
|
+
result = []
|
|
90
|
+
for char in name:
|
|
91
|
+
py_one = PinyinUtils.get_pinyin_or_char(char).replace("ü", "v")
|
|
92
|
+
result.append(py_one)
|
|
93
|
+
return " ".join(result)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
VALID_PINYINS = {
|
|
97
|
+
"a", "o", "e", "ai", "ei", "ao", "ou", "an", "en", "ang", "eng", "er",
|
|
98
|
+
"ba", "bai", "ban", "bang", "bao", "bei", "ben", "beng", "bi", "bian", "biao", "bie", "bin", "bing", "bo", "bu",
|
|
99
|
+
"pa", "pai", "pan", "pang", "pao", "pei", "pen", "peng", "pi", "pian", "piao", "pie", "pin", "ping", "po", "pu",
|
|
100
|
+
"ma", "mai", "man", "mang", "mao", "mei", "men", "meng", "mi", "mian", "miao", "mie", "min", "ming", "mo",
|
|
101
|
+
"mou",
|
|
102
|
+
"mu",
|
|
103
|
+
"fa", "fan", "fang", "fei", "fen", "feng", "fo", "fou", "fu",
|
|
104
|
+
"da", "dai", "dan", "dang", "dao", "dei", "den", "deng", "di", "dian", "diao", "die", "ding", "diu", "dong",
|
|
105
|
+
"dou",
|
|
106
|
+
"du", "duan", "dui", "dun", "duo",
|
|
107
|
+
"ta", "tai", "tan", "tang", "tao", "tei", "teng", "ti", "tian", "tiao", "tie", "ting", "tong", "tou", "tu",
|
|
108
|
+
"tuan",
|
|
109
|
+
"tui", "tun", "tuo",
|
|
110
|
+
"na", "nai", "nan", "nang", "nao", "nei", "nen", "neng", "ni", "nian", "niang", "niao", "nie", "nin", "ning",
|
|
111
|
+
"niu",
|
|
112
|
+
"nong", "nou", "nu", "nuan", "nue", "nv",
|
|
113
|
+
"la", "lai", "lan", "lang", "lao", "lei", "leng", "li", "lia", "lian", "liang", "liao", "lie", "lin", "ling",
|
|
114
|
+
"liu",
|
|
115
|
+
"lo", "long", "lou", "lu", "luan", "lue", "lv",
|
|
116
|
+
"ga", "gai", "gan", "gang", "gao", "ge", "gei", "gen", "geng", "gong", "gou", "gu", "gua", "guai", "guan",
|
|
117
|
+
"guang",
|
|
118
|
+
"gui", "gun", "guo",
|
|
119
|
+
"ka", "kai", "kan", "kang", "kao", "ke", "ken", "keng", "kong", "kou", "ku", "kua", "kuai", "kuan", "kuang",
|
|
120
|
+
"kui",
|
|
121
|
+
"kun", "kuo",
|
|
122
|
+
"ha", "hai", "han", "hang", "hao", "he", "hei", "hen", "heng", "hong", "hou", "hu", "hua", "huai", "huan",
|
|
123
|
+
"huang",
|
|
124
|
+
"hui", "hun", "huo",
|
|
125
|
+
"ji", "jia", "jian", "jiang", "jiao", "jie", "jin", "jing", "jiong", "jiu", "ju", "juan", "jue", "jun",
|
|
126
|
+
"qi", "qia", "qian", "qiang", "qiao", "qie", "qin", "qing", "qiong", "qiu", "qu", "quan", "que", "qun",
|
|
127
|
+
"xi", "xia", "xian", "xiang", "xiao", "xie", "xin", "xing", "xiong", "xiu", "xu", "xuan", "xue", "xun",
|
|
128
|
+
"zha", "zhai", "zhan", "zhang", "zhao", "zhe", "zhei", "zhen", "zheng", "zhong", "zhou", "zhu", "zhua", "zhuai",
|
|
129
|
+
"zhuan", "zhuang", "zhui", "zhun", "zhuo",
|
|
130
|
+
"cha", "chai", "chan", "chang", "chao", "che", "chen", "cheng", "chi", "chong", "chou", "chu", "chua", "chuai",
|
|
131
|
+
"chuan", "chuang", "chui", "chun", "chuo",
|
|
132
|
+
"sha", "shai", "shan", "shang", "shao", "she", "shei", "shen", "sheng", "shi", "shou", "shu", "shua", "shuai",
|
|
133
|
+
"shuan", "shuang", "shui", "shun", "shuo",
|
|
134
|
+
"ra", "ran", "rang", "rao", "re", "ren", "reng", "ri", "rong", "rou", "ru", "rua", "ruan", "rui", "run", "ruo",
|
|
135
|
+
"za", "zai", "zan", "zang", "zao", "ze", "zei", "zen", "zeng", "zong", "zou", "zu", "zuan", "zui", "zun", "zuo",
|
|
136
|
+
"ca", "cai", "can", "cang", "cao", "ce", "cen", "ceng", "cong", "cou", "cu", "cuan", "cui", "cun", "cuo",
|
|
137
|
+
"sa", "sai", "san", "sang", "sao", "se", "sen", "seng", "song", "sou", "su", "suan", "sui", "sun", "suo",
|
|
138
|
+
"ya", "yan", "yang", "yao", "ye", "yi", "yin", "ying", "yo", "yong", "you", "yu", "yuan", "yue", "yun",
|
|
139
|
+
"wa", "wai", "wan", "wang", "wei", "wen", "weng", "wo", "wu"
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def split_pinyin_all(s):
|
|
144
|
+
s = s.lower()
|
|
145
|
+
results = []
|
|
146
|
+
|
|
147
|
+
def dfs(start, path):
|
|
148
|
+
if start == len(s):
|
|
149
|
+
results.append(path[:])
|
|
150
|
+
return
|
|
151
|
+
for end in range(start + 1, min(len(s), start + 6) + 1):
|
|
152
|
+
part = s[start:end]
|
|
153
|
+
if part in VALID_PINYINS:
|
|
154
|
+
path.append(part)
|
|
155
|
+
dfs(end, path)
|
|
156
|
+
path.pop()
|
|
157
|
+
|
|
158
|
+
dfs(0, [])
|
|
159
|
+
return results
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def is_pinyin(word):
|
|
163
|
+
"""检测一个字符串是否是合法拼音(无歧义)"""
|
|
164
|
+
try:
|
|
165
|
+
# 尝试转换为拼音,如果原词是拼音,转换结果应该和原词相近
|
|
166
|
+
pinyin_list = pinyin(word, style=Style.NORMAL)
|
|
167
|
+
reconstructed = "".join([p[0] for p in pinyin_list])
|
|
168
|
+
# 检查转换后的拼音是否与原词相似(忽略大小写)
|
|
169
|
+
return word.lower() == reconstructed.lower()
|
|
170
|
+
except:
|
|
171
|
+
return False
|
|
172
|
+
|
|
173
|
+
def split_pinyin(word):
|
|
174
|
+
lists = split_pinyin_all(word)
|
|
175
|
+
lists = [" ".join(i) for i in lists]
|
|
176
|
+
if not lists:
|
|
177
|
+
return [word]
|
|
178
|
+
return lists
|
|
@@ -15,6 +15,20 @@ def is_all_english_chars(s):
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def contains_chinese_chars(s):
|
|
18
|
+
"""
|
|
19
|
+
判断字符串中是否包含中文(汉字)字符
|
|
20
|
+
|
|
21
|
+
使用 regex 库(不是内置 re),通过 Unicode 属性 IsHan
|
|
22
|
+
来匹配任意一个汉字。
|
|
23
|
+
|
|
24
|
+
参数:
|
|
25
|
+
s (str): 待检测的字符串
|
|
26
|
+
|
|
27
|
+
返回:
|
|
28
|
+
bool:
|
|
29
|
+
True - 字符串中至少包含一个中文字符
|
|
30
|
+
False - 字符串中不包含任何中文字符
|
|
31
|
+
"""
|
|
18
32
|
return bool(regex.search(r"[\p{IsHan}]", s))
|
|
19
33
|
|
|
20
34
|
|
|
@@ -108,56 +122,49 @@ def is_all_symbols(text):
|
|
|
108
122
|
# 检查每个字符是否属于符号类别
|
|
109
123
|
return all(unicodedata.category(char).startswith(('P', 'S')) for char in text)
|
|
110
124
|
|
|
111
|
-
|
|
112
|
-
def is_whole_word_en(sub_str: str, long_str: str) -> bool:
|
|
125
|
+
def is_whole_word_en_re(organ: str, ele_organ: str) -> bool:
|
|
113
126
|
"""
|
|
114
|
-
|
|
127
|
+
与 is_whole_word_en 效果一致
|
|
128
|
+
"""
|
|
129
|
+
if not organ or not ele_organ:
|
|
130
|
+
return False
|
|
115
131
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
long_str: 被搜索的字符串
|
|
132
|
+
pattern = rf"(^|[^a-z0-9-]){re.escape(organ)}([^a-z0-9-]|$)"
|
|
133
|
+
return re.search(pattern, ele_organ) is not None
|
|
119
134
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
"""
|
|
123
|
-
# 用于 忽略大小写 进行匹配
|
|
124
|
-
regex_pattern = re.compile(r"[^a-z0-9]", re.IGNORECASE) # 用于判断非字母数字字符
|
|
135
|
+
def is_whole_word_en(sub_str: str, long_str: str) -> bool:
|
|
136
|
+
regex_pattern = re.compile(r"[^a-z0-9-]", re.IGNORECASE)
|
|
125
137
|
|
|
126
138
|
if not sub_str or not long_str:
|
|
127
139
|
return False
|
|
128
140
|
|
|
129
|
-
#
|
|
130
|
-
if long_str ==
|
|
141
|
+
# 使用 startsWith 和 endsWith 检查边界
|
|
142
|
+
if long_str.startswith(sub_str) and long_str.endswith(sub_str) and len(sub_str) == len(long_str):
|
|
131
143
|
return True
|
|
132
144
|
|
|
133
|
-
#
|
|
134
|
-
index =
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
if index
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
145
|
+
# 检查是否在中间位置,且前后有非字母数字字符
|
|
146
|
+
# index = long_str.find(sub_str)
|
|
147
|
+
index_list = [m.start() for m in re.finditer(re.escape(sub_str), long_str)]
|
|
148
|
+
|
|
149
|
+
def get_bools(index):
|
|
150
|
+
if index >= 0:
|
|
151
|
+
is_start = False
|
|
152
|
+
is_end = False
|
|
153
|
+
if index == 0:
|
|
154
|
+
is_start = True
|
|
155
|
+
else:
|
|
156
|
+
is_start = bool(regex_pattern.match(long_str[index - 1]))
|
|
157
|
+
|
|
158
|
+
if len(long_str) == len(sub_str) + index:
|
|
159
|
+
is_end = True
|
|
160
|
+
else:
|
|
161
|
+
is_end = bool(regex_pattern.match(long_str[index + len(sub_str)]))
|
|
162
|
+
|
|
163
|
+
return is_start and is_end
|
|
144
164
|
else:
|
|
145
|
-
|
|
165
|
+
return False
|
|
146
166
|
|
|
147
|
-
|
|
148
|
-
end_index = index + len(sub_str)
|
|
149
|
-
if end_index == len(long_str):
|
|
150
|
-
is_end = True
|
|
151
|
-
else:
|
|
152
|
-
is_end = bool(regex_pattern.match(long_str[end_index]))
|
|
153
|
-
|
|
154
|
-
if is_start and is_end:
|
|
155
|
-
return True
|
|
156
|
-
|
|
157
|
-
# 移动索引继续查找
|
|
158
|
-
index += 1
|
|
159
|
-
|
|
160
|
-
return False
|
|
167
|
+
return any([get_bools(index) for index in index_list])
|
|
161
168
|
|
|
162
169
|
|
|
163
170
|
def is_whole_word(sub_str: str, long_str: str) -> bool:
|
|
@@ -178,10 +185,10 @@ def is_whole_word(sub_str: str, long_str: str) -> bool:
|
|
|
178
185
|
# 是否是字母数字
|
|
179
186
|
if is_ascii_alnum(sub_str[0]) or is_ascii_alnum(sub_str[-1]):
|
|
180
187
|
# 表示中英文混合 看是否是截断单词即可
|
|
181
|
-
return
|
|
188
|
+
return is_whole_word_en_re(sub_str, long_str)
|
|
182
189
|
else:
|
|
183
190
|
# 中文子串只要被包含即可视为“完整词”
|
|
184
191
|
return is_contain
|
|
185
192
|
else:
|
|
186
193
|
# 英文使用完整单词判断逻辑
|
|
187
|
-
return
|
|
194
|
+
return is_whole_word_en_re(sub_str, long_str)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: re_common
|
|
3
|
-
Version: 10.0.
|
|
3
|
+
Version: 10.0.43
|
|
4
4
|
Summary: a library about all python projects
|
|
5
5
|
Home-page: https://gitee.com/xujiangios/re-common
|
|
6
6
|
Author: vic
|
|
@@ -11,6 +11,15 @@ Classifier: Operating System :: OS Independent
|
|
|
11
11
|
Requires-Python: >=3.6
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
|
+
Dynamic: author
|
|
15
|
+
Dynamic: author-email
|
|
16
|
+
Dynamic: classifier
|
|
17
|
+
Dynamic: description
|
|
18
|
+
Dynamic: description-content-type
|
|
19
|
+
Dynamic: home-page
|
|
20
|
+
Dynamic: license-file
|
|
21
|
+
Dynamic: requires-python
|
|
22
|
+
Dynamic: summary
|
|
14
23
|
|
|
15
24
|
|
|
16
25
|
这是一个基础类,依赖很多的第三方包,是一个用得到的第三方库的封装,可以在此基础上迅速构建项目
|
|
@@ -177,33 +177,36 @@ re_common/v2/baselibrary/helpers/search_packge/fit_text_match.py,sha256=cMN4W7xu
|
|
|
177
177
|
re_common/v2/baselibrary/helpers/search_packge/scikit_learn_text_matcher.py,sha256=Ri8Ul2_URq1TVvlXwG0OvqBo9_LSpivvdvjQM7xr01I,9947
|
|
178
178
|
re_common/v2/baselibrary/helpers/search_packge/test.py,sha256=jYDa6s66jqiz6xEhXMPLqmONFbmfv-EgxaVpdHbGk4U,52
|
|
179
179
|
re_common/v2/baselibrary/s3object/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
180
|
+
re_common/v2/baselibrary/s3object/baseaioboto3.py,sha256=Xqk1z0DyNM127EBH1sstftGSkRNkm6eKlbNRXby38rI,4383
|
|
180
181
|
re_common/v2/baselibrary/s3object/baseboto3.py,sha256=mXuIFx99pnrPGQ4LJCZwlN1HLbaU-OWLwck0cVzW6hc,11203
|
|
182
|
+
re_common/v2/baselibrary/s3object/huaweiobs.py,sha256=rUXm7S5yZ4T4a_CK0ggsEX4k_Th2jZW2oygGb6xhtW8,2754
|
|
181
183
|
re_common/v2/baselibrary/tools/WeChatRobot.py,sha256=sKBt2gPsfj0gzV6KaLSAhIhL-j3qNfHfqE-lII1LVwM,3537
|
|
182
184
|
re_common/v2/baselibrary/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
183
185
|
re_common/v2/baselibrary/tools/ac_ahocorasick.py,sha256=c63y5RtKVLD37nyPCnBqfNygwRj4gTQqyIdDOrC65G0,2847
|
|
184
186
|
re_common/v2/baselibrary/tools/concurrency.py,sha256=ctKBoeEbq1SGmhPp7oVR_QSXTKVAGLMByAHQKI6dAqU,981
|
|
185
187
|
re_common/v2/baselibrary/tools/dict_tools.py,sha256=eSMwPTLp3oSjuviC_wlXg0I-dnkkmZfUfCRLX5djWV8,1365
|
|
188
|
+
re_common/v2/baselibrary/tools/dir_file_tools.py,sha256=0ZohO8ZCLjuwx1zh4fY1Zcxv9lBAGJamH_B3ZCFO5AI,801
|
|
186
189
|
re_common/v2/baselibrary/tools/dolphinscheduler.py,sha256=1m7UGYDiuvJUCI6ik6CGM2fO8U5XteJzn55VRbwB9ts,7978
|
|
187
190
|
re_common/v2/baselibrary/tools/hdfs_base_processor.py,sha256=PFTyhth9RBoMTxsOzUxpyEfQegRi_Sd-C0lvIEOCxCg,8746
|
|
188
191
|
re_common/v2/baselibrary/tools/hdfs_bulk_processor.py,sha256=8FjuZbcBXC_27zEQonLKvb0fMGwcIH9MPRTOmdMAuLU,2396
|
|
189
192
|
re_common/v2/baselibrary/tools/hdfs_data_processer.py,sha256=g0DaNjXM1hIUblFQ6YBwnwEBKIXn48X8Y9Eiok4dVlQ,14824
|
|
190
193
|
re_common/v2/baselibrary/tools/hdfs_line_processor.py,sha256=h1J_mOPoNvsjw7zYMsD7rr0Q6bXvVzo9tRJZVAbei1s,2732
|
|
191
|
-
re_common/v2/baselibrary/tools/list_tools.py,sha256=
|
|
194
|
+
re_common/v2/baselibrary/tools/list_tools.py,sha256=nPhOTsGAPAyNISZwTyrF07Mvgs5jrU4vn8H9LWZiuQs,2630
|
|
192
195
|
re_common/v2/baselibrary/tools/resume_tracker.py,sha256=rVvkd3-eF5lX0VesIHc3YjVRE6bsgAd1VT0G8Uq5DVM,3252
|
|
193
196
|
re_common/v2/baselibrary/tools/search_hash_tools.py,sha256=2ENLtZE8opRsfkwRtTNMzITmpTsjO7wZ1ZkfkqpOH9U,1937
|
|
194
197
|
re_common/v2/baselibrary/tools/text_matcher.py,sha256=cPMoFxaA0-ce3tLRxVSs8_3pTYS1oVIHDnNy_AlPU-4,10756
|
|
195
198
|
re_common/v2/baselibrary/tools/unionfind_tools.py,sha256=VYHZZPXwBYljsm7TjV1B6iCgDn3O3btzNf9hMvQySVU,2965
|
|
196
199
|
re_common/v2/baselibrary/tools/data_processer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
197
|
-
re_common/v2/baselibrary/tools/data_processer/base.py,sha256=
|
|
200
|
+
re_common/v2/baselibrary/tools/data_processer/base.py,sha256=1cVDcznux06SlLKINyjjN7yYn3ugH7XirXe_bFjdah0,1734
|
|
198
201
|
re_common/v2/baselibrary/tools/data_processer/data_processer.py,sha256=NEJDBs7M6F9CisDDPt8Eop1Sh2tCarKj35W6oOXz9js,22247
|
|
199
|
-
re_common/v2/baselibrary/tools/data_processer/data_reader.py,sha256=
|
|
202
|
+
re_common/v2/baselibrary/tools/data_processer/data_reader.py,sha256=nYXh1qMebfqV_vUBJtcvoV2z-a1pRvM0foZ2ehKXhpg,8098
|
|
200
203
|
re_common/v2/baselibrary/tools/data_processer/data_writer.py,sha256=OgKZ06zRJYNx758rbjxZG_KNgkLuVLlyB1AvyRsJtS4,1447
|
|
201
204
|
re_common/v2/baselibrary/tools/tree_processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
202
205
|
re_common/v2/baselibrary/tools/tree_processor/builder.py,sha256=-4e8G8BVcD9kgbEjL2TVdymjH7txj07rcI1izWtf9hc,831
|
|
203
206
|
re_common/v2/baselibrary/tools/tree_processor/node.py,sha256=_nfnqazid2wJvMmpHBsGVG9I4ggTrv7eLKrbhXbxf2c,326
|
|
204
207
|
re_common/v2/baselibrary/utils/BusinessStringUtil.py,sha256=njPcRgeBWpnZr5u2cPAO4qdWBq-CgTn99rJuvWFcChk,6788
|
|
205
208
|
re_common/v2/baselibrary/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
206
|
-
re_common/v2/baselibrary/utils/api_net_utils.py,sha256=
|
|
209
|
+
re_common/v2/baselibrary/utils/api_net_utils.py,sha256=68aFDfNRqjLiRepFg0EG0ZJwBEs4QE9S3uB7h4nSwNU,10758
|
|
207
210
|
re_common/v2/baselibrary/utils/author_smi.py,sha256=Fh276u7za-GF_tK9xpBFYF5q1E3_tX22ZouWC8U7w8o,13831
|
|
208
211
|
re_common/v2/baselibrary/utils/base_string_similarity.py,sha256=a40a79ttwoG_gC_hxMNB-sMXXecgICoRDWrj0DW8iEE,7749
|
|
209
212
|
re_common/v2/baselibrary/utils/basedict.py,sha256=sH3_RZ8u4649-jX2V1uKNNkjJVUijZBDp6SdqncOZ88,1583
|
|
@@ -215,7 +218,8 @@ re_common/v2/baselibrary/utils/elasticsearch.py,sha256=USjMAYAhpNQLs-dGjQgwPZNl_
|
|
|
215
218
|
re_common/v2/baselibrary/utils/json_cls.py,sha256=M93piYtmgm_wP8E57culTrd_AhHLoGg6PqeAJYdW2SM,438
|
|
216
219
|
re_common/v2/baselibrary/utils/mq.py,sha256=UHpO8iNIHs91Tgp-BgnSUpZwjWquxrGLdpr3FMMv2zw,2858
|
|
217
220
|
re_common/v2/baselibrary/utils/n_ary_expression_tree.py,sha256=-05kO6G2Rth7CEK-5lfFrthFZ1Q0-0a7cni7mWZ-2gg,9172
|
|
218
|
-
re_common/v2/baselibrary/utils/
|
|
221
|
+
re_common/v2/baselibrary/utils/pinyin_utils.py,sha256=OXZfVvMjKfCvEbJ6PIwpwWbupU1CSBXJNDnf3jMhC10,7141
|
|
222
|
+
re_common/v2/baselibrary/utils/string_bool.py,sha256=FsmrUZw1CCUXrfC9ZejdgKO35320wWffiduAGhl_bFQ,6412
|
|
219
223
|
re_common/v2/baselibrary/utils/string_clear.py,sha256=Ympa0Cs2y_72QeeyMS8de8y_QgtEFJJQ0AgHnylbMUc,7861
|
|
220
224
|
re_common/v2/baselibrary/utils/string_smi.py,sha256=cU0WAWHRGnGoVQx3eCEKeM_q_olFNzRTJe7rSe586SY,741
|
|
221
225
|
re_common/v2/baselibrary/utils/stringutils.py,sha256=hH0pHNvgR_TgulmBPRax9U_sp6bwYG5ksDbdqHRCFvk,10083
|
|
@@ -245,8 +249,8 @@ re_common/vip/title/transform/TransformRegulationTitleToZt.py,sha256=LKRdIsWKues
|
|
|
245
249
|
re_common/vip/title/transform/TransformStandardTitleToZt.py,sha256=-fCKAbSBzXVyQDCE61CalvR9E_QzQMA08QOO_NePFNI,5563
|
|
246
250
|
re_common/vip/title/transform/TransformThesisTitleToZt.py,sha256=QS-uV0cQrpUFAcKucuJQ9Ue2VRQH-inmfn_X3IplfRo,5488
|
|
247
251
|
re_common/vip/title/transform/__init__.py,sha256=m83-CWyRq_VHPYHaALEQlmXrkTdrZ3e4B_kCfBYE-uc,239
|
|
248
|
-
re_common-10.0.
|
|
249
|
-
re_common-10.0.
|
|
250
|
-
re_common-10.0.
|
|
251
|
-
re_common-10.0.
|
|
252
|
-
re_common-10.0.
|
|
252
|
+
re_common-10.0.43.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
253
|
+
re_common-10.0.43.dist-info/METADATA,sha256=wLSvL6sw37eaQmsTV3OYThFy_oqzN4Bx1qcXw36SbNQ,787
|
|
254
|
+
re_common-10.0.43.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
255
|
+
re_common-10.0.43.dist-info/top_level.txt,sha256=_H9H23zoLIalm1AIY_KYTVh_H0ZnmjxQIxsvXtLv45o,10
|
|
256
|
+
re_common-10.0.43.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|