re-common 10.0.35__py3-none-any.whl → 10.0.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- re_common/v2/baselibrary/utils/db.py +54 -0
- re_common/v2/baselibrary/utils/stringutils.py +15 -5
- {re_common-10.0.35.dist-info → re_common-10.0.37.dist-info}/METADATA +1 -1
- {re_common-10.0.35.dist-info → re_common-10.0.37.dist-info}/RECORD +7 -7
- {re_common-10.0.35.dist-info → re_common-10.0.37.dist-info}/LICENSE +0 -0
- {re_common-10.0.35.dist-info → re_common-10.0.37.dist-info}/WHEEL +0 -0
- {re_common-10.0.35.dist-info → re_common-10.0.37.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
|
+
import time
|
|
3
|
+
|
|
2
4
|
import aiomysql
|
|
3
5
|
import asyncio
|
|
4
6
|
from contextlib import asynccontextmanager
|
|
@@ -92,6 +94,7 @@ aiomysql_pool = None
|
|
|
92
94
|
pool_lock = asyncio.Lock() # 全局异步锁
|
|
93
95
|
|
|
94
96
|
|
|
97
|
+
|
|
95
98
|
async def init_aiomysql_pool_async():
|
|
96
99
|
global aiomysql_pool
|
|
97
100
|
if aiomysql_pool is None:
|
|
@@ -100,3 +103,54 @@ async def init_aiomysql_pool_async():
|
|
|
100
103
|
print(f"[{os.getpid()}] Initializing aiomysql pool...")
|
|
101
104
|
aiomysql_pool = await aiomysql.create_pool(**DB_CONFIG)
|
|
102
105
|
return aiomysql_pool
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
motor_fs = None
|
|
109
|
+
client = None
|
|
110
|
+
motor_fs_lock = asyncio.Lock() # 全局异步锁
|
|
111
|
+
_loop_id_mongo = None
|
|
112
|
+
|
|
113
|
+
async def check_connection(client):
|
|
114
|
+
try:
|
|
115
|
+
print("check mongodb client ping")
|
|
116
|
+
await client.admin.command("ping")
|
|
117
|
+
return True
|
|
118
|
+
except Exception:
|
|
119
|
+
return False
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
async def init_motor_async(uri, db_name, bucket_name, is_reload=False):
|
|
123
|
+
global motor_fs, client, _loop_id_mongo
|
|
124
|
+
is_ping = True
|
|
125
|
+
|
|
126
|
+
if _loop_id_mongo is not None:
|
|
127
|
+
loop_id = id(asyncio.get_running_loop())
|
|
128
|
+
if loop_id != _loop_id_mongo:
|
|
129
|
+
is_reload = True
|
|
130
|
+
|
|
131
|
+
# 防止 每次都检查 只有 is_reload 时才检查连接
|
|
132
|
+
if is_reload:
|
|
133
|
+
is_ping = await check_connection(client)
|
|
134
|
+
if motor_fs is None or not is_ping:
|
|
135
|
+
async with motor_fs_lock:
|
|
136
|
+
if motor_fs is None or not is_ping:
|
|
137
|
+
print(f"[{os.getpid()}] Initializing motor_fs...")
|
|
138
|
+
from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorGridFSBucket
|
|
139
|
+
client = AsyncIOMotorClient(uri)
|
|
140
|
+
db = client[db_name]
|
|
141
|
+
motor_fs = AsyncIOMotorGridFSBucket(database=db, bucket_name=bucket_name)
|
|
142
|
+
_loop_id_mongo = id(asyncio.get_running_loop())
|
|
143
|
+
return motor_fs, client
|
|
144
|
+
|
|
145
|
+
# async def run_main():
|
|
146
|
+
# while True:
|
|
147
|
+
# uri = "mongodb://192.168.98.80:27001/wpdc"
|
|
148
|
+
# db_name = "wpdc"
|
|
149
|
+
# bucket_name = "sci_doc"
|
|
150
|
+
# motor_fs, client = await init_motor_async(uri, db_name, bucket_name,is_reload=True)
|
|
151
|
+
# # print(await check_connection(client))
|
|
152
|
+
# time.sleep(3)
|
|
153
|
+
#
|
|
154
|
+
#
|
|
155
|
+
# if __name__ == "__main__":
|
|
156
|
+
# asyncio.run(run_main())
|
|
@@ -179,12 +179,22 @@ class HTMLTextExtractor(HTMLParser):
|
|
|
179
179
|
# parser.close()
|
|
180
180
|
# return parser.get_text()
|
|
181
181
|
|
|
182
|
+
# def clean_html(html):
|
|
183
|
+
# """使用 Parsel 提取 HTML 中的纯文本"""
|
|
184
|
+
# sel = Selector(text=html, type='html')
|
|
185
|
+
# # 提取所有文本(包括子元素的文本)
|
|
186
|
+
# text = sel.xpath("string()").getall()
|
|
187
|
+
# return "".join(text).strip()
|
|
188
|
+
|
|
189
|
+
|
|
182
190
|
def clean_html(html):
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
191
|
+
from bs4 import BeautifulSoup
|
|
192
|
+
try:
|
|
193
|
+
soup = BeautifulSoup(html, "lxml")
|
|
194
|
+
return soup.get_text()
|
|
195
|
+
except:
|
|
196
|
+
soup = BeautifulSoup(html, "html5lib")
|
|
197
|
+
return soup.get_text()
|
|
188
198
|
|
|
189
199
|
|
|
190
200
|
def remove_spaces_between_chinese_characters(text):
|
|
@@ -207,14 +207,14 @@ re_common/v2/baselibrary/utils/basedict.py,sha256=sH3_RZ8u4649-jX2V1uKNNkjJVUijZ
|
|
|
207
207
|
re_common/v2/baselibrary/utils/basehdfs.py,sha256=wwvk4kvipD-AVWCF0WRzBTM2q2wAvQtanLK-Hjp8FOk,6026
|
|
208
208
|
re_common/v2/baselibrary/utils/basepika.py,sha256=ifOb3UsGj79k40aD9UK6-5BMPw43ZAo0SO3AYD4q4vw,7332
|
|
209
209
|
re_common/v2/baselibrary/utils/basetime.py,sha256=b7U_ho6nE3fjYBxSkdMHXUOd3ClH6KkW_7p7l2Gs4gA,3038
|
|
210
|
-
re_common/v2/baselibrary/utils/db.py,sha256=
|
|
210
|
+
re_common/v2/baselibrary/utils/db.py,sha256=5IOYOOGe6mNKhOeaCAq0iif5yJdDotBmrsMZmT1syjs,4862
|
|
211
211
|
re_common/v2/baselibrary/utils/json_cls.py,sha256=M93piYtmgm_wP8E57culTrd_AhHLoGg6PqeAJYdW2SM,438
|
|
212
212
|
re_common/v2/baselibrary/utils/mq.py,sha256=UHpO8iNIHs91Tgp-BgnSUpZwjWquxrGLdpr3FMMv2zw,2858
|
|
213
213
|
re_common/v2/baselibrary/utils/n_ary_expression_tree.py,sha256=-05kO6G2Rth7CEK-5lfFrthFZ1Q0-0a7cni7mWZ-2gg,9172
|
|
214
214
|
re_common/v2/baselibrary/utils/string_bool.py,sha256=vxnjSFOfuHWGxkqaIbUNn21opx5tfV1uCXSahFfp1mU,6197
|
|
215
215
|
re_common/v2/baselibrary/utils/string_clear.py,sha256=Ympa0Cs2y_72QeeyMS8de8y_QgtEFJJQ0AgHnylbMUc,7861
|
|
216
216
|
re_common/v2/baselibrary/utils/string_smi.py,sha256=cU0WAWHRGnGoVQx3eCEKeM_q_olFNzRTJe7rSe586SY,741
|
|
217
|
-
re_common/v2/baselibrary/utils/stringutils.py,sha256=
|
|
217
|
+
re_common/v2/baselibrary/utils/stringutils.py,sha256=KnvKKmYafz-NbqnQq8iKXxQtA1xpHKaYzKcZex8yuM0,8397
|
|
218
218
|
re_common/vip/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
219
219
|
re_common/vip/base_step_process.py,sha256=VXXiNj0I5CpzXIMCgOPU86bzDJkSBkUS-9CpZIl_GOk,205
|
|
220
220
|
re_common/vip/baseencodeid.py,sha256=nERoe89ueFM52bG7xwJdflcZHk6T2RQQKbc5uUZc3RM,3272
|
|
@@ -241,8 +241,8 @@ re_common/vip/title/transform/TransformRegulationTitleToZt.py,sha256=LKRdIsWKues
|
|
|
241
241
|
re_common/vip/title/transform/TransformStandardTitleToZt.py,sha256=-fCKAbSBzXVyQDCE61CalvR9E_QzQMA08QOO_NePFNI,5563
|
|
242
242
|
re_common/vip/title/transform/TransformThesisTitleToZt.py,sha256=QS-uV0cQrpUFAcKucuJQ9Ue2VRQH-inmfn_X3IplfRo,5488
|
|
243
243
|
re_common/vip/title/transform/__init__.py,sha256=m83-CWyRq_VHPYHaALEQlmXrkTdrZ3e4B_kCfBYE-uc,239
|
|
244
|
-
re_common-10.0.
|
|
245
|
-
re_common-10.0.
|
|
246
|
-
re_common-10.0.
|
|
247
|
-
re_common-10.0.
|
|
248
|
-
re_common-10.0.
|
|
244
|
+
re_common-10.0.37.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
245
|
+
re_common-10.0.37.dist-info/METADATA,sha256=Yg4jmi3x2Z1aU5nkOIhjtsvzNI4ZnDp3YwdgBYu8IY8,582
|
|
246
|
+
re_common-10.0.37.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
247
|
+
re_common-10.0.37.dist-info/top_level.txt,sha256=_H9H23zoLIalm1AIY_KYTVh_H0ZnmjxQIxsvXtLv45o,10
|
|
248
|
+
re_common-10.0.37.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|