re-common 10.0.17__py3-none-any.whl → 10.0.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- re_common/v2/baselibrary/tools/hdfs_data_processer.py +4 -5
- {re_common-10.0.17.dist-info → re_common-10.0.19.dist-info}/METADATA +10 -2
- {re_common-10.0.17.dist-info → re_common-10.0.19.dist-info}/RECORD +6 -6
- {re_common-10.0.17.dist-info → re_common-10.0.19.dist-info}/WHEEL +1 -1
- {re_common-10.0.17.dist-info → re_common-10.0.19.dist-info}/LICENSE +0 -0
- {re_common-10.0.17.dist-info → re_common-10.0.19.dist-info}/top_level.txt +0 -0
|
@@ -109,13 +109,12 @@ class HDFSDataProcessor:
|
|
|
109
109
|
retry_count = 0
|
|
110
110
|
while retry_count < self.retry_limit:
|
|
111
111
|
try:
|
|
112
|
-
await process_func(data)
|
|
113
|
-
return # 成功处理后退出
|
|
112
|
+
return await process_func(data) # 成功处理后退出
|
|
114
113
|
except Exception as e:
|
|
115
114
|
retry_count += 1
|
|
116
115
|
print(f"处理数据时发生错误: {e}, 正在重试 {retry_count}/{self.retry_limit}, data: {data}")
|
|
117
116
|
await asyncio.sleep(2**retry_count)
|
|
118
|
-
|
|
117
|
+
raise Exception(f"处理数据失败, 达到重试上限, data: {data}")
|
|
119
118
|
|
|
120
119
|
async def process_file(self, hdfs_file_path, process_func, write_dir: str):
|
|
121
120
|
"""处理单个 gz 文件"""
|
|
@@ -133,7 +132,7 @@ class HDFSDataProcessor:
|
|
|
133
132
|
data = json.loads(line)
|
|
134
133
|
tasks.append(self.process_data(data, process_func))
|
|
135
134
|
except json.JSONDecodeError as e:
|
|
136
|
-
|
|
135
|
+
raise Exception(f"解析JSON失败: {e}, 行内容: {line.strip()}")
|
|
137
136
|
|
|
138
137
|
# await AsyncTaskPool(self.batch_size).run(tasks) # AsyncTaskPool 适用于一次提交所有任务, 限制并发数执行
|
|
139
138
|
results.extend(await asyncio.gather(*tasks))
|
|
@@ -237,7 +236,7 @@ class HDFSDataProcessor:
|
|
|
237
236
|
data = json.loads(line)
|
|
238
237
|
batch_data.append(data)
|
|
239
238
|
except json.JSONDecodeError as e:
|
|
240
|
-
|
|
239
|
+
raise Exception(f"解析JSON失败: {e}, 行内容: {line.strip()}")
|
|
241
240
|
|
|
242
241
|
# 处理读取到的批次数据
|
|
243
242
|
if batch_data:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: re_common
|
|
3
|
-
Version: 10.0.
|
|
3
|
+
Version: 10.0.19
|
|
4
4
|
Summary: a library about all python projects
|
|
5
5
|
Home-page: https://gitee.com/xujiangios/re-common
|
|
6
6
|
Author: vic
|
|
@@ -11,6 +11,14 @@ Classifier: Operating System :: OS Independent
|
|
|
11
11
|
Requires-Python: >=3.6
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
|
+
Dynamic: author
|
|
15
|
+
Dynamic: author-email
|
|
16
|
+
Dynamic: classifier
|
|
17
|
+
Dynamic: description
|
|
18
|
+
Dynamic: description-content-type
|
|
19
|
+
Dynamic: home-page
|
|
20
|
+
Dynamic: requires-python
|
|
21
|
+
Dynamic: summary
|
|
14
22
|
|
|
15
23
|
|
|
16
24
|
这是一个基础类,依赖很多的第三方包,是一个用得到的第三方库的封装,可以在此基础上迅速构建项目
|
|
@@ -173,7 +173,7 @@ re_common/v2/baselibrary/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
|
|
|
173
173
|
re_common/v2/baselibrary/tools/ac_ahocorasick.py,sha256=c63y5RtKVLD37nyPCnBqfNygwRj4gTQqyIdDOrC65G0,2847
|
|
174
174
|
re_common/v2/baselibrary/tools/dict_tools.py,sha256=BTh7oJuJ619IZgxiYlim0ltrXBclDtb7WzyFGr7wVf0,1246
|
|
175
175
|
re_common/v2/baselibrary/tools/dolphinscheduler.py,sha256=1m7UGYDiuvJUCI6ik6CGM2fO8U5XteJzn55VRbwB9ts,7978
|
|
176
|
-
re_common/v2/baselibrary/tools/hdfs_data_processer.py,sha256=
|
|
176
|
+
re_common/v2/baselibrary/tools/hdfs_data_processer.py,sha256=g0DaNjXM1hIUblFQ6YBwnwEBKIXn48X8Y9Eiok4dVlQ,14824
|
|
177
177
|
re_common/v2/baselibrary/tools/list_tools.py,sha256=mZyrOGdW6tuany0lKQOD4P739xikvmeKm1VSzo37Byc,1973
|
|
178
178
|
re_common/v2/baselibrary/tools/search_hash_tools.py,sha256=2ENLtZE8opRsfkwRtTNMzITmpTsjO7wZ1ZkfkqpOH9U,1937
|
|
179
179
|
re_common/v2/baselibrary/tools/text_matcher.py,sha256=cPMoFxaA0-ce3tLRxVSs8_3pTYS1oVIHDnNy_AlPU-4,10756
|
|
@@ -219,8 +219,8 @@ re_common/vip/title/transform/TransformRegulationTitleToZt.py,sha256=LKRdIsWKues
|
|
|
219
219
|
re_common/vip/title/transform/TransformStandardTitleToZt.py,sha256=-fCKAbSBzXVyQDCE61CalvR9E_QzQMA08QOO_NePFNI,5563
|
|
220
220
|
re_common/vip/title/transform/TransformThesisTitleToZt.py,sha256=QS-uV0cQrpUFAcKucuJQ9Ue2VRQH-inmfn_X3IplfRo,5488
|
|
221
221
|
re_common/vip/title/transform/__init__.py,sha256=m83-CWyRq_VHPYHaALEQlmXrkTdrZ3e4B_kCfBYE-uc,239
|
|
222
|
-
re_common-10.0.
|
|
223
|
-
re_common-10.0.
|
|
224
|
-
re_common-10.0.
|
|
225
|
-
re_common-10.0.
|
|
226
|
-
re_common-10.0.
|
|
222
|
+
re_common-10.0.19.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
223
|
+
re_common-10.0.19.dist-info/METADATA,sha256=HmlwedZs6uIg94rFxGDllhiPs6eRSk_NtjdcHmbZ3Sg,764
|
|
224
|
+
re_common-10.0.19.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
225
|
+
re_common-10.0.19.dist-info/top_level.txt,sha256=_H9H23zoLIalm1AIY_KYTVh_H0ZnmjxQIxsvXtLv45o,10
|
|
226
|
+
re_common-10.0.19.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|