cppackage 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
CPpackage/db/sql_model.py CHANGED
@@ -1,6 +1,5 @@
1
1
  import pymysql
2
2
  import time
3
- from pymysql import Error
4
3
  try:
5
4
  from .config import get_db_config
6
5
  except (ImportError, ValueError):
@@ -13,12 +12,8 @@ except (ImportError, ValueError):
13
12
 
14
13
  # ===================== 数据库连接 =====================
15
14
 
16
- def _get_connection(database=None, port=None):
17
- cfg = get_db_config()
18
-
19
- db = database if database else cfg.get('database')
20
- prt = port if port else cfg.get('port', 3306)
21
-
15
+ def _create_new_connection(cfg, db, prt):
16
+ """创建新的数据库连接"""
22
17
  return pymysql.connect(
23
18
  host=cfg.get('host'),
24
19
  user=cfg.get('user'),
@@ -30,6 +25,33 @@ def _get_connection(database=None, port=None):
30
25
  )
31
26
 
32
27
 
28
+ def _get_connection(database=None, port=None):
29
+ """
30
+ 获取数据库连接,包含健康检查机制
31
+ - 使用 ping(reconnect=True) 检测连接有效性
32
+ - 如果连接失效,自动重新创建连接
33
+ """
34
+ cfg = get_db_config()
35
+
36
+ db = database if database else cfg.get('database')
37
+ prt = port if port else cfg.get('port', 3306)
38
+
39
+ conn = _create_new_connection(cfg, db, prt)
40
+
41
+ # 连接健康检查:发送 ping 确保连接有效
42
+ try:
43
+ conn.ping(reconnect=False)
44
+ except Exception:
45
+ # 如果 ping 失败,重新创建连接
46
+ try:
47
+ conn.close()
48
+ except Exception:
49
+ pass
50
+ conn = _create_new_connection(cfg, db, prt)
51
+
52
+ return conn
53
+
54
+
33
55
  # ===================== 查询操作 =====================
34
56
 
35
57
  def sel_data(sql, params=None, port=None, database=None):
@@ -139,8 +161,7 @@ def find_duplicate_records(table_name, database, unique_index_fields, port=None)
139
161
  if not isinstance(unique_index_fields, list) or len(unique_index_fields) == 0:
140
162
  print("错误:unique_index_fields 必须是非空列表!")
141
163
  return None
142
-
143
- fields_str = ",".join([f"`{field}`" for field in unique_index_fields])
164
+
144
165
  group_by_str = ",".join([f"`{field}`" for field in unique_index_fields])
145
166
 
146
167
  # 核心SQL:查询所有重复记录
@@ -244,43 +265,110 @@ def delete_duplicate_records(table_name, database, unique_index_fields, port=Non
244
265
  return None
245
266
  # ===================== DataFrame 入库 =====================
246
267
 
247
- def update_datas(df, table_name, database):
268
+ def _is_connection_error(error_msg):
269
+ """
270
+ 判断是否是数据库连接相关错误
271
+ 返回 True 表示是连接错误,可以通过重试解决
272
+ """
273
+ connection_error_codes = [
274
+ '(0, \'\')', # pymysql InterfaceError: 连接已关闭
275
+ '2003', # Can't connect to MySQL server
276
+ '2006', # MySQL server has gone away
277
+ '2013', # Lost connection to MySQL server during query
278
+ '2014', # Commands out of sync
279
+ '1045', # Access denied
280
+ '2055', # Lost connection to MySQL server at 'xxx'
281
+ ]
282
+ return any(code in error_msg for code in connection_error_codes)
283
+
284
+
285
+ def update_datas(df, table_name, database, batch_size=20):
286
+ """
287
+ 将 DataFrame 数据批量插入/更新到数据库
288
+ 包含完善的重试机制:
289
+ - 连接错误:最多重试3次,指数退避
290
+ - 字段缺失错误:自动同步字段后重试
291
+ - 其他错误:立即抛出
292
+ """
293
+ if df.empty:
294
+ print("数据为空,无需入库")
295
+ return
296
+
297
+ cols = list(df.columns)
298
+ col_str = ",".join([f"`{c}`" for c in cols])
299
+ value_tpl = "(" + ",".join(["%s"] * len(cols)) + ")"
300
+ update_clause = ",".join(
301
+ [f"`{c}`=VALUES(`{c}`)" for c in cols if c != "id"]
302
+ )
303
+
304
+ total = len(df)
305
+ success_count = 0
306
+ max_retries = 3 # 最大重试次数
307
+
308
+ for start in range(0, total, batch_size):
309
+ batch_df = df.iloc[start:start + batch_size]
310
+ values_str = ",".join([value_tpl] * len(batch_df))
311
+ sql = f"""
312
+ INSERT INTO `{table_name}` ({col_str})
313
+ VALUES {values_str}
314
+ ON DUPLICATE KEY UPDATE {update_clause}
315
+ """
316
+ data = [tuple(row) for row in batch_df.values]
317
+ flat_data = [v for row in data for v in row]
318
+
319
+ for attempt in range(max_retries):
320
+ conn = None
321
+ try:
322
+ conn = _get_connection(database)
323
+ # 连接健康检查
324
+ conn.ping(reconnect=False)
325
+
326
+ cursor = conn.cursor()
327
+ cursor.execute(sql, flat_data)
328
+ conn.commit()
329
+ success_count += cursor.rowcount
330
+ break # 成功,跳出重试循环
331
+
332
+ except Exception as e:
333
+ error_str = str(e)
334
+ if conn:
335
+ try:
336
+ conn.rollback()
337
+ except Exception:
338
+ pass
339
+
340
+ print(f"批次入库失败(start={start}, attempt={attempt+1}/{max_retries}): {e}")
341
+
342
+ # 判断是否是连接相关错误
343
+ if _is_connection_error(error_str):
344
+ if attempt < max_retries - 1:
345
+ # 指数退避:2秒、4秒、8秒
346
+ wait_time = 2 * (2 ** attempt)
347
+ print(f"连接错误,等待{wait_time}秒后重试...")
348
+ time.sleep(wait_time)
349
+ continue
350
+ else:
351
+ # 最后一次重试仍然失败,抛出异常
352
+ raise Exception(f"数据库连接错误,已重试{max_retries}次: {e}")
353
+
354
+ elif '1054, "Unknown column' in error_str:
355
+ # 字段缺失错误,同步字段后重试
356
+ check_and_sync_columns(df, table_name, database)
357
+ # 字段同步成功后,继续当前批次的重试
358
+ if attempt < max_retries - 1:
359
+ continue
360
+ else:
361
+ raise Exception(f"字段同步后仍然失败: {e}")
362
+ else:
363
+ # 其他非连接错误,直接抛出
364
+ raise
365
+
366
+ finally:
367
+ if conn:
368
+ try:
369
+ conn.close()
370
+ except Exception:
371
+ pass
372
+
373
+ print(f"成功插入/更新 {success_count} 条记录")
248
374
 
249
- conn = None
250
- for i in range(2):
251
- try:
252
- if df.empty:
253
- print("数据为空,无需入库")
254
- return
255
- conn = _get_connection(database)
256
- cursor = conn.cursor()
257
- cols = list(df.columns)
258
- col_str = ",".join([f"`{c}`" for c in cols])
259
- value_tpl = "(" + ",".join(["%s"] * len(cols)) + ")"
260
- values_str = ",".join([value_tpl] * len(df))
261
- update_clause = ",".join(
262
- [f"`{c}`=VALUES(`{c}`)" for c in cols if c != "id"]
263
- )
264
- sql = f"""
265
- INSERT INTO `{table_name}` ({col_str})
266
- VALUES {values_str}
267
- ON DUPLICATE KEY UPDATE {update_clause}
268
- """
269
- data = [tuple(row) for row in df.values]
270
- flat_data = [v for row in data for v in row]
271
- cursor.execute(sql, flat_data)
272
- conn.commit()
273
- print(f"成功插入/更新 {cursor.rowcount} 条记录")
274
- break
275
- except Exception as e:
276
- if conn:
277
- conn.rollback()
278
- print("入库失败:", e)
279
- if '1054, "Unknown column' in str(e):
280
- # 表结构校验,df为抓取数据表
281
- check_and_sync_columns(df,table_name,database)
282
- else:
283
- raise Exception(e)
284
- finally:
285
- if conn:
286
- conn.close()
CPpackage/db/test.py CHANGED
@@ -4,22 +4,22 @@ import os
4
4
 
5
5
  if __name__ == "__main__":
6
6
  # 配置你的参数
7
- TABLE_NAME = "liuliang_plly"
7
+ TABLE_NAME = "pinlei_sp360_xsfx"
8
8
  DATABASE_NAME = "shengyicanmou"
9
9
  # 你的唯一索引字段列表(必须是列表类型!)
10
- UNIQUE_FIELDS = ['begindate', 'value_l', 'pageName', 'itemId', 'name_l1', 'name_l2', 'date_effect', 'store_id']
10
+ UNIQUE_FIELDS = ["begindate", "value_l", "itemId", "store_id", "date_effect"]
11
11
  PORT = 3306 # 可选,默认从配置获取
12
12
 
13
13
  # 第一步:查询重复记录
14
- # dup_df = find_duplicate_records(TABLE_NAME, DATABASE_NAME, UNIQUE_FIELDS, PORT)
14
+ dup_df = find_duplicate_records(TABLE_NAME, DATABASE_NAME, UNIQUE_FIELDS, PORT)
15
15
 
16
16
  # # 第二步:确认有重复后删除
17
- # if dup_df is not None and not dup_df.empty:
17
+ if dup_df is not None and not dup_df.empty:
18
18
 
19
- delete_count = delete_duplicate_records(
20
- table_name=TABLE_NAME,
21
- database=DATABASE_NAME,
22
- unique_index_fields=UNIQUE_FIELDS, # 必须传列表!
23
- port=PORT,
24
- keep_strategy="max_id"
25
- )
19
+ delete_count = delete_duplicate_records(
20
+ table_name=TABLE_NAME,
21
+ database=DATABASE_NAME,
22
+ unique_index_fields=UNIQUE_FIELDS, # 必须传列表!
23
+ port=PORT,
24
+ keep_strategy="max_id"
25
+ )
@@ -1,10 +1,12 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cppackage
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Summary: 超品集团自用的Python包
5
5
  Home-page: https://github.com/example/CPpackage
6
6
  Author: team-数智组
7
7
  Author-email: m110135@163.com
8
+ License: UNKNOWN
9
+ Platform: UNKNOWN
8
10
  Classifier: Development Status :: 3 - Alpha
9
11
  Classifier: Intended Audience :: Developers
10
12
  Classifier: License :: OSI Approved :: MIT License
@@ -16,7 +18,6 @@ Classifier: Programming Language :: Python :: 3.11
16
18
  Classifier: Programming Language :: Python :: 3.12
17
19
  Requires-Python: >=3.8
18
20
  Description-Content-Type: text/markdown
19
- License-File: LICENSE
20
21
  Requires-Dist: pymysql
21
22
  Requires-Dist: pandas
22
23
  Requires-Dist: numpy
@@ -70,3 +71,5 @@ CPpackage/
70
71
  ## 许可证
71
72
 
72
73
  本项目采用MIT许可证。详情请参阅LICENSE文件。
74
+
75
+
@@ -0,0 +1,13 @@
1
+ CPpackage/__init__.py,sha256=KpVaXkIzkNDl-dfSX8Rr7z9XFlipxx1nZ_GzS4Qls4I,173
2
+ CPpackage/core/__init__.py,sha256=92mF0310uQ5ujlgd-LCMs5kZVzfGRdcl67GHIRWhHcA,504
3
+ CPpackage/db/__init__.py,sha256=WhrLqsjq97KKHF7YV-HcZf9pRtHuqKhRjXDNPCBVbjY,107
4
+ CPpackage/db/config.py,sha256=A9HFCXqlrvssHLD1Ys9HpjDckavalFj31X1Np_aDNp4,729
5
+ CPpackage/db/sql_model.py,sha256=suwQMXf_hpfYfZR07SadIa2E_y_mbEKKy-6BuZCwWE4,13002
6
+ CPpackage/db/test.py,sha256=1EfTt53H9HLIGK6U2SAF6lIDgz2LItJ9tNHpxojz9ro,939
7
+ CPpackage/utils/__init__.py,sha256=sAespT0aoLr3O0WF27DwHLYF60VZgPA1tiT0JJ8s8zI,264
8
+ cppackage-0.3.2.dist-info/LICENSE,sha256=6Pj597LnvlRtq3Vgjkfp6zgBeBh7wl8jKE-nCFg4vZI,1065
9
+ cppackage-0.3.2.dist-info/METADATA,sha256=KFc2Lr9sQj2vjZaiVRAHKd65oMROmyAUBxSbiQDv5Ug,2305
10
+ cppackage-0.3.2.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
11
+ cppackage-0.3.2.dist-info/entry_points.txt,sha256=6MmsHuaQY5w0GxhHAx3lQfHTaocmueEf1OGsoM-hMrQ,51
12
+ cppackage-0.3.2.dist-info/top_level.txt,sha256=FQrfWDbJistWQIrHW_Aoxoy_UWSzjOByWVcVS5ig9Tk,10
13
+ cppackage-0.3.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.46.3)
2
+ Generator: bdist_wheel (0.45.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,2 +1,3 @@
1
1
  [console_scripts]
2
2
  cppackage = CPpackage.core:main
3
+
@@ -1,13 +0,0 @@
1
- CPpackage/__init__.py,sha256=KpVaXkIzkNDl-dfSX8Rr7z9XFlipxx1nZ_GzS4Qls4I,173
2
- CPpackage/core/__init__.py,sha256=92mF0310uQ5ujlgd-LCMs5kZVzfGRdcl67GHIRWhHcA,504
3
- CPpackage/db/__init__.py,sha256=WhrLqsjq97KKHF7YV-HcZf9pRtHuqKhRjXDNPCBVbjY,107
4
- CPpackage/db/config.py,sha256=A9HFCXqlrvssHLD1Ys9HpjDckavalFj31X1Np_aDNp4,729
5
- CPpackage/db/sql_model.py,sha256=uoqyNTM7tDVh9rt-2AQ8r6CyXf1DtsbBcjLd9tMq4oY,9948
6
- CPpackage/db/test.py,sha256=qi6dM_ITxFb87CugaQuUdeZi1tJHSX8euoXiZyX242s,945
7
- CPpackage/utils/__init__.py,sha256=sAespT0aoLr3O0WF27DwHLYF60VZgPA1tiT0JJ8s8zI,264
8
- cppackage-0.3.0.dist-info/LICENSE,sha256=6Pj597LnvlRtq3Vgjkfp6zgBeBh7wl8jKE-nCFg4vZI,1065
9
- cppackage-0.3.0.dist-info/METADATA,sha256=kOyAMCsz-eOUeAo6SgkimCJ1kN8_gzD6-Qg_PEZhFQg,2287
10
- cppackage-0.3.0.dist-info/WHEEL,sha256=hPN0AlP2dZM_3ZJZWP4WooepkmU9wzjGgCLCeFjkHLA,92
11
- cppackage-0.3.0.dist-info/entry_points.txt,sha256=SAzF2klkNyeNCqXuiNkDirN999l7v8CGCrHR0StD_oo,50
12
- cppackage-0.3.0.dist-info/top_level.txt,sha256=FQrfWDbJistWQIrHW_Aoxoy_UWSzjOByWVcVS5ig9Tk,10
13
- cppackage-0.3.0.dist-info/RECORD,,