re-common 10.0.35__py3-none-any.whl → 10.0.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,6 @@
1
1
  import os
2
+ import time
3
+
2
4
  import aiomysql
3
5
  import asyncio
4
6
  from contextlib import asynccontextmanager
@@ -92,6 +94,7 @@ aiomysql_pool = None
92
94
  pool_lock = asyncio.Lock() # 全局异步锁
93
95
 
94
96
 
97
+
95
98
  async def init_aiomysql_pool_async():
96
99
  global aiomysql_pool
97
100
  if aiomysql_pool is None:
@@ -100,3 +103,54 @@ async def init_aiomysql_pool_async():
100
103
  print(f"[{os.getpid()}] Initializing aiomysql pool...")
101
104
  aiomysql_pool = await aiomysql.create_pool(**DB_CONFIG)
102
105
  return aiomysql_pool
106
+
107
+
108
+ motor_fs = None
109
+ client = None
110
+ motor_fs_lock = asyncio.Lock() # 全局异步锁
111
+ _loop_id_mongo = None
112
+
113
+ async def check_connection(client):
114
+ try:
115
+ print("check mongodb client ping")
116
+ await client.admin.command("ping")
117
+ return True
118
+ except Exception:
119
+ return False
120
+
121
+
122
+ async def init_motor_async(uri, db_name, bucket_name, is_reload=False):
123
+ global motor_fs, client, _loop_id_mongo
124
+ is_ping = True
125
+
126
+ if _loop_id_mongo is not None:
127
+ loop_id = id(asyncio.get_running_loop())
128
+ if loop_id != _loop_id_mongo:
129
+ is_reload = True
130
+
131
+ # 防止 每次都检查 只有 is_reload 时才检查连接
132
+ if is_reload:
133
+ is_ping = await check_connection(client)
134
+ if motor_fs is None or not is_ping:
135
+ async with motor_fs_lock:
136
+ if motor_fs is None or not is_ping:
137
+ print(f"[{os.getpid()}] Initializing motor_fs...")
138
+ from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorGridFSBucket
139
+ client = AsyncIOMotorClient(uri)
140
+ db = client[db_name]
141
+ motor_fs = AsyncIOMotorGridFSBucket(database=db, bucket_name=bucket_name)
142
+ _loop_id_mongo = id(asyncio.get_running_loop())
143
+ return motor_fs, client
144
+
145
+ # async def run_main():
146
+ # while True:
147
+ # uri = "mongodb://192.168.98.80:27001/wpdc"
148
+ # db_name = "wpdc"
149
+ # bucket_name = "sci_doc"
150
+ # motor_fs, client = await init_motor_async(uri, db_name, bucket_name,is_reload=True)
151
+ # # print(await check_connection(client))
152
+ # time.sleep(3)
153
+ #
154
+ #
155
+ # if __name__ == "__main__":
156
+ # asyncio.run(run_main())
@@ -179,12 +179,22 @@ class HTMLTextExtractor(HTMLParser):
179
179
  # parser.close()
180
180
  # return parser.get_text()
181
181
 
182
+ # def clean_html(html):
183
+ # """使用 Parsel 提取 HTML 中的纯文本"""
184
+ # sel = Selector(text=html, type='html')
185
+ # # 提取所有文本(包括子元素的文本)
186
+ # text = sel.xpath("string()").getall()
187
+ # return "".join(text).strip()
188
+
189
+
182
190
  def clean_html(html):
183
- """使用 Parsel 提取 HTML 中的纯文本"""
184
- sel = Selector(text=html)
185
- # 提取所有文本(包括子元素的文本)
186
- text = sel.xpath("string()").getall()
187
- return "".join(text).strip()
191
+ from bs4 import BeautifulSoup
192
+ try:
193
+ soup = BeautifulSoup(html, "lxml")
194
+ return soup.get_text()
195
+ except:
196
+ soup = BeautifulSoup(html, "html5lib")
197
+ return soup.get_text()
188
198
 
189
199
 
190
200
  def remove_spaces_between_chinese_characters(text):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: re_common
3
- Version: 10.0.35
3
+ Version: 10.0.37
4
4
  Summary: a library about all python projects
5
5
  Home-page: https://gitee.com/xujiangios/re-common
6
6
  Author: vic
@@ -207,14 +207,14 @@ re_common/v2/baselibrary/utils/basedict.py,sha256=sH3_RZ8u4649-jX2V1uKNNkjJVUijZ
207
207
  re_common/v2/baselibrary/utils/basehdfs.py,sha256=wwvk4kvipD-AVWCF0WRzBTM2q2wAvQtanLK-Hjp8FOk,6026
208
208
  re_common/v2/baselibrary/utils/basepika.py,sha256=ifOb3UsGj79k40aD9UK6-5BMPw43ZAo0SO3AYD4q4vw,7332
209
209
  re_common/v2/baselibrary/utils/basetime.py,sha256=b7U_ho6nE3fjYBxSkdMHXUOd3ClH6KkW_7p7l2Gs4gA,3038
210
- re_common/v2/baselibrary/utils/db.py,sha256=JktBem1IAsg7m-Wnsa-AT0Hj4bIkHyoKGBEDI1rL5Fc,3135
210
+ re_common/v2/baselibrary/utils/db.py,sha256=5IOYOOGe6mNKhOeaCAq0iif5yJdDotBmrsMZmT1syjs,4862
211
211
  re_common/v2/baselibrary/utils/json_cls.py,sha256=M93piYtmgm_wP8E57culTrd_AhHLoGg6PqeAJYdW2SM,438
212
212
  re_common/v2/baselibrary/utils/mq.py,sha256=UHpO8iNIHs91Tgp-BgnSUpZwjWquxrGLdpr3FMMv2zw,2858
213
213
  re_common/v2/baselibrary/utils/n_ary_expression_tree.py,sha256=-05kO6G2Rth7CEK-5lfFrthFZ1Q0-0a7cni7mWZ-2gg,9172
214
214
  re_common/v2/baselibrary/utils/string_bool.py,sha256=vxnjSFOfuHWGxkqaIbUNn21opx5tfV1uCXSahFfp1mU,6197
215
215
  re_common/v2/baselibrary/utils/string_clear.py,sha256=Ympa0Cs2y_72QeeyMS8de8y_QgtEFJJQ0AgHnylbMUc,7861
216
216
  re_common/v2/baselibrary/utils/string_smi.py,sha256=cU0WAWHRGnGoVQx3eCEKeM_q_olFNzRTJe7rSe586SY,741
217
- re_common/v2/baselibrary/utils/stringutils.py,sha256=jUmtyo4qu-GmE7_Y8gqFcYPJ7kOo8MSwxEsT7vGp9_c,8131
217
+ re_common/v2/baselibrary/utils/stringutils.py,sha256=KnvKKmYafz-NbqnQq8iKXxQtA1xpHKaYzKcZex8yuM0,8397
218
218
  re_common/vip/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
219
219
  re_common/vip/base_step_process.py,sha256=VXXiNj0I5CpzXIMCgOPU86bzDJkSBkUS-9CpZIl_GOk,205
220
220
  re_common/vip/baseencodeid.py,sha256=nERoe89ueFM52bG7xwJdflcZHk6T2RQQKbc5uUZc3RM,3272
@@ -241,8 +241,8 @@ re_common/vip/title/transform/TransformRegulationTitleToZt.py,sha256=LKRdIsWKues
241
241
  re_common/vip/title/transform/TransformStandardTitleToZt.py,sha256=-fCKAbSBzXVyQDCE61CalvR9E_QzQMA08QOO_NePFNI,5563
242
242
  re_common/vip/title/transform/TransformThesisTitleToZt.py,sha256=QS-uV0cQrpUFAcKucuJQ9Ue2VRQH-inmfn_X3IplfRo,5488
243
243
  re_common/vip/title/transform/__init__.py,sha256=m83-CWyRq_VHPYHaALEQlmXrkTdrZ3e4B_kCfBYE-uc,239
244
- re_common-10.0.35.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
245
- re_common-10.0.35.dist-info/METADATA,sha256=Uh894BDybODTMQemgCS-4hSIRVchegBTzUmBY7XZqRo,582
246
- re_common-10.0.35.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
247
- re_common-10.0.35.dist-info/top_level.txt,sha256=_H9H23zoLIalm1AIY_KYTVh_H0ZnmjxQIxsvXtLv45o,10
248
- re_common-10.0.35.dist-info/RECORD,,
244
+ re_common-10.0.37.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
245
+ re_common-10.0.37.dist-info/METADATA,sha256=Yg4jmi3x2Z1aU5nkOIhjtsvzNI4ZnDp3YwdgBYu8IY8,582
246
+ re_common-10.0.37.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
247
+ re_common-10.0.37.dist-info/top_level.txt,sha256=_H9H23zoLIalm1AIY_KYTVh_H0ZnmjxQIxsvXtLv45o,10
248
+ re_common-10.0.37.dist-info/RECORD,,