re-common 10.0.43__py3-none-any.whl → 10.0.44__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- re_common/v2/baselibrary/tools/data_processer/data_processer.py +1 -1
- re_common/v2/baselibrary/tools/hdfs_base_processor.py +3 -1
- re_common/v2/baselibrary/utils/db.py +1 -27
- re_common/v2/baselibrary/utils/string_bool.py +7 -4
- {re_common-10.0.43.dist-info → re_common-10.0.44.dist-info}/METADATA +1 -1
- {re_common-10.0.43.dist-info → re_common-10.0.44.dist-info}/RECORD +9 -9
- {re_common-10.0.43.dist-info → re_common-10.0.44.dist-info}/WHEEL +0 -0
- {re_common-10.0.43.dist-info → re_common-10.0.44.dist-info}/licenses/LICENSE +0 -0
- {re_common-10.0.43.dist-info → re_common-10.0.44.dist-info}/top_level.txt +0 -0
|
@@ -34,6 +34,7 @@ class HDFSBaseProcessor(abc.ABC):
|
|
|
34
34
|
read_mode: Literal["all", "stream"] = "all",
|
|
35
35
|
retries: int = 3,
|
|
36
36
|
pool_factory: Callable[[], Awaitable[Any]] = None,
|
|
37
|
+
pool_factory_kwargs: dict = None,
|
|
37
38
|
max_processes: int = None, # 添加多进程支持
|
|
38
39
|
result_dir: str = None,
|
|
39
40
|
):
|
|
@@ -48,6 +49,7 @@ class HDFSBaseProcessor(abc.ABC):
|
|
|
48
49
|
self.retries = retries
|
|
49
50
|
self.result_dir = result_dir
|
|
50
51
|
self.pool_factory = pool_factory
|
|
52
|
+
self.pool_factory_kwargs = pool_factory_kwargs
|
|
51
53
|
self.max_processes = max_processes or max(multiprocessing.cpu_count() - 1, 1) # 默认使用CPU核心数-1
|
|
52
54
|
self._client = None
|
|
53
55
|
|
|
@@ -64,7 +66,7 @@ class HDFSBaseProcessor(abc.ABC):
|
|
|
64
66
|
return None
|
|
65
67
|
global _pool
|
|
66
68
|
if _pool is None:
|
|
67
|
-
_pool = await self.pool_factory()
|
|
69
|
+
_pool = await self.pool_factory(self.pool_factory_kwargs)
|
|
68
70
|
return _pool
|
|
69
71
|
|
|
70
72
|
def _list_gz_files(self) -> List[str]:
|
|
@@ -9,33 +9,7 @@ from collections import namedtuple
|
|
|
9
9
|
|
|
10
10
|
from aiomysql import Pool, Connection, Cursor
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
"host": "192.168.98.64",
|
|
14
|
-
"port": 4000,
|
|
15
|
-
"user": "dataware_house_baseUser",
|
|
16
|
-
"password": "FF19AF831AEBD580B450B16BF9264200",
|
|
17
|
-
"db": "dataware_house_base",
|
|
18
|
-
"charset": "utf8mb4",
|
|
19
|
-
"minsize": 16, # 最小连接数
|
|
20
|
-
"maxsize": 128, # 最大连接数
|
|
21
|
-
"autocommit": False, # 自动提交事务
|
|
22
|
-
"pool_recycle": 3600, # 每个连接的回收时间(秒),超过此时间后连接将被关闭并重新创建,避免失效连接
|
|
23
|
-
"echo": False, # 打印SQL语句
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
DB_CONFIG1 = {
|
|
27
|
-
"host": "192.168.98.64",
|
|
28
|
-
"port": 4000,
|
|
29
|
-
"user": "foreign_fulltextUser",
|
|
30
|
-
"password": "i4hIeasw1qpmhGN2nwL7",
|
|
31
|
-
"db": "foreign_fulltext",
|
|
32
|
-
"charset": "utf8mb4",
|
|
33
|
-
"minsize": 16, # 最小连接数
|
|
34
|
-
"maxsize": 128, # 最大连接数
|
|
35
|
-
"autocommit": False, # 自动提交事务
|
|
36
|
-
"pool_recycle": 3600, # 每个连接的回收时间(秒),超过此时间后连接将被关闭并重新创建,避免失效连接
|
|
37
|
-
"echo": False, # 打印SQL语句
|
|
38
|
-
}
|
|
12
|
+
|
|
39
13
|
|
|
40
14
|
|
|
41
15
|
async def get_pool_only(_DB_CONFIG: dict = None):
|
|
@@ -3,6 +3,8 @@ import re
|
|
|
3
3
|
import regex
|
|
4
4
|
import unicodedata
|
|
5
5
|
|
|
6
|
+
from re_common.v2.baselibrary.decorators.utils import deprecated
|
|
7
|
+
|
|
6
8
|
|
|
7
9
|
def is_ascii_alnum(char: str) -> bool:
|
|
8
10
|
# 精准判断是否为英文或数字字符(ASCII 范围)
|
|
@@ -122,16 +124,17 @@ def is_all_symbols(text):
|
|
|
122
124
|
# 检查每个字符是否属于符号类别
|
|
123
125
|
return all(unicodedata.category(char).startswith(('P', 'S')) for char in text)
|
|
124
126
|
|
|
125
|
-
def is_whole_word_en_re(
|
|
127
|
+
def is_whole_word_en_re(sub_str: str, long_str: str) -> bool:
|
|
126
128
|
"""
|
|
127
129
|
与 is_whole_word_en 效果一致
|
|
128
130
|
"""
|
|
129
|
-
if not
|
|
131
|
+
if not sub_str or not long_str:
|
|
130
132
|
return False
|
|
131
133
|
|
|
132
|
-
pattern = rf"(^|[^a-z0-9-]){re.escape(
|
|
133
|
-
return re.search(pattern,
|
|
134
|
+
pattern = rf"(^|[^a-z0-9-]){re.escape(sub_str)}([^a-z0-9-]|$)"
|
|
135
|
+
return re.search(pattern, long_str) is not None
|
|
134
136
|
|
|
137
|
+
@deprecated("请使用 is_whole_word_en_re 中的方法代替。")
|
|
135
138
|
def is_whole_word_en(sub_str: str, long_str: str) -> bool:
|
|
136
139
|
regex_pattern = re.compile(r"[^a-z0-9-]", re.IGNORECASE)
|
|
137
140
|
|
|
@@ -187,7 +187,7 @@ re_common/v2/baselibrary/tools/concurrency.py,sha256=ctKBoeEbq1SGmhPp7oVR_QSXTKV
|
|
|
187
187
|
re_common/v2/baselibrary/tools/dict_tools.py,sha256=eSMwPTLp3oSjuviC_wlXg0I-dnkkmZfUfCRLX5djWV8,1365
|
|
188
188
|
re_common/v2/baselibrary/tools/dir_file_tools.py,sha256=0ZohO8ZCLjuwx1zh4fY1Zcxv9lBAGJamH_B3ZCFO5AI,801
|
|
189
189
|
re_common/v2/baselibrary/tools/dolphinscheduler.py,sha256=1m7UGYDiuvJUCI6ik6CGM2fO8U5XteJzn55VRbwB9ts,7978
|
|
190
|
-
re_common/v2/baselibrary/tools/hdfs_base_processor.py,sha256=
|
|
190
|
+
re_common/v2/baselibrary/tools/hdfs_base_processor.py,sha256=Gv4YR9JzAlPvGysFNvcS7RgG5skLM05rWpJGu0jbLLU,8869
|
|
191
191
|
re_common/v2/baselibrary/tools/hdfs_bulk_processor.py,sha256=8FjuZbcBXC_27zEQonLKvb0fMGwcIH9MPRTOmdMAuLU,2396
|
|
192
192
|
re_common/v2/baselibrary/tools/hdfs_data_processer.py,sha256=g0DaNjXM1hIUblFQ6YBwnwEBKIXn48X8Y9Eiok4dVlQ,14824
|
|
193
193
|
re_common/v2/baselibrary/tools/hdfs_line_processor.py,sha256=h1J_mOPoNvsjw7zYMsD7rr0Q6bXvVzo9tRJZVAbei1s,2732
|
|
@@ -198,7 +198,7 @@ re_common/v2/baselibrary/tools/text_matcher.py,sha256=cPMoFxaA0-ce3tLRxVSs8_3pTY
|
|
|
198
198
|
re_common/v2/baselibrary/tools/unionfind_tools.py,sha256=VYHZZPXwBYljsm7TjV1B6iCgDn3O3btzNf9hMvQySVU,2965
|
|
199
199
|
re_common/v2/baselibrary/tools/data_processer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
200
200
|
re_common/v2/baselibrary/tools/data_processer/base.py,sha256=1cVDcznux06SlLKINyjjN7yYn3ugH7XirXe_bFjdah0,1734
|
|
201
|
-
re_common/v2/baselibrary/tools/data_processer/data_processer.py,sha256=
|
|
201
|
+
re_common/v2/baselibrary/tools/data_processer/data_processer.py,sha256=Z3iESmdrRugtlv6fBNYTSVIiwblnv4IkKN2RwfWFtRA,22253
|
|
202
202
|
re_common/v2/baselibrary/tools/data_processer/data_reader.py,sha256=nYXh1qMebfqV_vUBJtcvoV2z-a1pRvM0foZ2ehKXhpg,8098
|
|
203
203
|
re_common/v2/baselibrary/tools/data_processer/data_writer.py,sha256=OgKZ06zRJYNx758rbjxZG_KNgkLuVLlyB1AvyRsJtS4,1447
|
|
204
204
|
re_common/v2/baselibrary/tools/tree_processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -213,13 +213,13 @@ re_common/v2/baselibrary/utils/basedict.py,sha256=sH3_RZ8u4649-jX2V1uKNNkjJVUijZ
|
|
|
213
213
|
re_common/v2/baselibrary/utils/basehdfs.py,sha256=wwvk4kvipD-AVWCF0WRzBTM2q2wAvQtanLK-Hjp8FOk,6026
|
|
214
214
|
re_common/v2/baselibrary/utils/basepika.py,sha256=ifOb3UsGj79k40aD9UK6-5BMPw43ZAo0SO3AYD4q4vw,7332
|
|
215
215
|
re_common/v2/baselibrary/utils/basetime.py,sha256=w87FF5adZ12oJfV3oAnN7sr8iNThkgJqT3RsDtWPLmU,3729
|
|
216
|
-
re_common/v2/baselibrary/utils/db.py,sha256=
|
|
216
|
+
re_common/v2/baselibrary/utils/db.py,sha256=zGSXwZAMDllvccnw-hx71XpkFPEsS07m-Spk-dG6PJ8,4352
|
|
217
217
|
re_common/v2/baselibrary/utils/elasticsearch.py,sha256=USjMAYAhpNQLs-dGjQgwPZNl_g2V22aRyaWDMFOp58c,1357
|
|
218
218
|
re_common/v2/baselibrary/utils/json_cls.py,sha256=M93piYtmgm_wP8E57culTrd_AhHLoGg6PqeAJYdW2SM,438
|
|
219
219
|
re_common/v2/baselibrary/utils/mq.py,sha256=UHpO8iNIHs91Tgp-BgnSUpZwjWquxrGLdpr3FMMv2zw,2858
|
|
220
220
|
re_common/v2/baselibrary/utils/n_ary_expression_tree.py,sha256=-05kO6G2Rth7CEK-5lfFrthFZ1Q0-0a7cni7mWZ-2gg,9172
|
|
221
221
|
re_common/v2/baselibrary/utils/pinyin_utils.py,sha256=OXZfVvMjKfCvEbJ6PIwpwWbupU1CSBXJNDnf3jMhC10,7141
|
|
222
|
-
re_common/v2/baselibrary/utils/string_bool.py,sha256=
|
|
222
|
+
re_common/v2/baselibrary/utils/string_bool.py,sha256=pCLxlJgXSSqnxJziE5lekkRXzeOkfniUq3yQ-WDgO-4,6551
|
|
223
223
|
re_common/v2/baselibrary/utils/string_clear.py,sha256=Ympa0Cs2y_72QeeyMS8de8y_QgtEFJJQ0AgHnylbMUc,7861
|
|
224
224
|
re_common/v2/baselibrary/utils/string_smi.py,sha256=cU0WAWHRGnGoVQx3eCEKeM_q_olFNzRTJe7rSe586SY,741
|
|
225
225
|
re_common/v2/baselibrary/utils/stringutils.py,sha256=hH0pHNvgR_TgulmBPRax9U_sp6bwYG5ksDbdqHRCFvk,10083
|
|
@@ -249,8 +249,8 @@ re_common/vip/title/transform/TransformRegulationTitleToZt.py,sha256=LKRdIsWKues
|
|
|
249
249
|
re_common/vip/title/transform/TransformStandardTitleToZt.py,sha256=-fCKAbSBzXVyQDCE61CalvR9E_QzQMA08QOO_NePFNI,5563
|
|
250
250
|
re_common/vip/title/transform/TransformThesisTitleToZt.py,sha256=QS-uV0cQrpUFAcKucuJQ9Ue2VRQH-inmfn_X3IplfRo,5488
|
|
251
251
|
re_common/vip/title/transform/__init__.py,sha256=m83-CWyRq_VHPYHaALEQlmXrkTdrZ3e4B_kCfBYE-uc,239
|
|
252
|
-
re_common-10.0.
|
|
253
|
-
re_common-10.0.
|
|
254
|
-
re_common-10.0.
|
|
255
|
-
re_common-10.0.
|
|
256
|
-
re_common-10.0.
|
|
252
|
+
re_common-10.0.44.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
253
|
+
re_common-10.0.44.dist-info/METADATA,sha256=WEvuEGO-2wJY144wQZ4whteTVYfdilKpJi9JCvbXnEI,787
|
|
254
|
+
re_common-10.0.44.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
255
|
+
re_common-10.0.44.dist-info/top_level.txt,sha256=_H9H23zoLIalm1AIY_KYTVh_H0ZnmjxQIxsvXtLv45o,10
|
|
256
|
+
re_common-10.0.44.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|