cppackage 0.2.9__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,6 @@
1
1
  import pymysql
2
2
  import time
3
3
  from pymysql import Error
4
- import pandas as pd
5
- # 条件导入:支持直接运行和包模式
6
4
  try:
7
5
  from .config import get_db_config
8
6
  except (ImportError, ValueError):
@@ -161,6 +159,13 @@ def find_duplicate_records(table_name, database, unique_index_fields, port=None)
161
159
  """
162
160
 
163
161
  try:
162
+ # Lazy import pandas here to avoid heavy imports during module import
163
+ try:
164
+ import pandas as pd
165
+ except Exception as e:
166
+ print("无法导入 pandas:", e)
167
+ return None
168
+
164
169
  with _get_connection(database, port) as conn:
165
170
  df_duplicates = pd.read_sql(sql, conn)
166
171
  if df_duplicates.empty:
@@ -168,8 +173,8 @@ def find_duplicate_records(table_name, database, unique_index_fields, port=None)
168
173
  else:
169
174
  print(f"发现 {len(df_duplicates)} 条重复记录({len(df_duplicates.drop_duplicates(subset=unique_index_fields))} 组重复组合)")
170
175
  # 可选:保存重复记录到CSV
171
- df_duplicates.to_csv(f"duplicate_records_{table_name}.csv", index=False, encoding="utf-8-sig")
172
- print("重复记录已保存到 duplicate_records_{table_name}.csv")
176
+ # df_duplicates.to_csv(f"duplicate_records_{table_name}.csv", index=False, encoding="utf-8-sig")
177
+ # print("重复记录已保存到 duplicate_records_{table_name}.csv")
173
178
  return df_duplicates
174
179
  except pymysql.MySQLError as e:
175
180
  print("数据库查询失败:", e)
@@ -178,7 +183,7 @@ def find_duplicate_records(table_name, database, unique_index_fields, port=None)
178
183
  print("未知错误:", e)
179
184
  return None
180
185
  # ===================== 删除重复记录函数(修复参数+调用逻辑) =====================
181
- def delete_duplicate_records(table_name, database, unique_index_fields, port=None, keep_strategy="min_id"):
186
+ def delete_duplicate_records(table_name, database, unique_index_fields, port=None, keep_strategy="max_id"):
182
187
  """
183
188
  删除重复记录,仅保留每组唯一值的一条记录
184
189
  参数:
@@ -190,10 +195,6 @@ def delete_duplicate_records(table_name, database, unique_index_fields, port=Non
190
195
  返回:
191
196
  int: 删除的记录数;None: 出错;0: 无重复
192
197
  """
193
- # 1. 先调用修复后的find_duplicate_records查询重复数据
194
- duplicate_df = find_duplicate_records(table_name, database, unique_index_fields, port)
195
- if duplicate_df is None or duplicate_df.empty:
196
- return 0
197
198
 
198
199
  # 2. 构建删除SQL(核心逻辑)
199
200
  group_by_str = ",".join([f"`{field}`" for field in unique_index_fields])
@@ -248,6 +249,9 @@ def update_datas(df, table_name, database):
248
249
  conn = None
249
250
  for i in range(2):
250
251
  try:
252
+ if df.empty:
253
+ print("数据为空,无需入库")
254
+ return
251
255
  conn = _get_connection(database)
252
256
  cursor = conn.cursor()
253
257
  cols = list(df.columns)
@@ -262,7 +266,6 @@ def update_datas(df, table_name, database):
262
266
  VALUES {values_str}
263
267
  ON DUPLICATE KEY UPDATE {update_clause}
264
268
  """
265
- print(sql)
266
269
  data = [tuple(row) for row in df.values]
267
270
  flat_data = [v for row in data for v in row]
268
271
  cursor.execute(sql, flat_data)
@@ -281,5 +284,3 @@ def update_datas(df, table_name, database):
281
284
  finally:
282
285
  if conn:
283
286
  conn.close()
284
-
285
-
@@ -0,0 +1,25 @@
1
+ from CPpackage.db.sql_model import find_duplicate_records,delete_duplicate_records
2
+ import os
3
+
4
+
5
+ if __name__ == "__main__":
6
+ # 配置你的参数
7
+ TABLE_NAME = "liuliang_plly"
8
+ DATABASE_NAME = "shengyicanmou"
9
+ # 你的唯一索引字段列表(必须是列表类型!)
10
+ UNIQUE_FIELDS = ['begindate', 'value_l', 'pageName', 'itemId', 'name_l1', 'name_l2', 'date_effect', 'store_id']
11
+ PORT = 3306 # 可选,默认从配置获取
12
+
13
+ # 第一步:查询重复记录
14
+ # dup_df = find_duplicate_records(TABLE_NAME, DATABASE_NAME, UNIQUE_FIELDS, PORT)
15
+
16
+ # # 第二步:确认有重复后删除
17
+ # if dup_df is not None and not dup_df.empty:
18
+
19
+ delete_count = delete_duplicate_records(
20
+ table_name=TABLE_NAME,
21
+ database=DATABASE_NAME,
22
+ unique_index_fields=UNIQUE_FIELDS, # 必须传列表!
23
+ port=PORT,
24
+ keep_strategy="max_id"
25
+ )
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.4
1
+ Metadata-Version: 2.1
2
2
  Name: cppackage
3
- Version: 0.2.9
3
+ Version: 0.3.0
4
4
  Summary: 超品集团自用的Python包
5
5
  Home-page: https://github.com/example/CPpackage
6
6
  Author: team-数智组
@@ -17,19 +17,6 @@ Classifier: Programming Language :: Python :: 3.12
17
17
  Requires-Python: >=3.8
18
18
  Description-Content-Type: text/markdown
19
19
  License-File: LICENSE
20
- Requires-Dist: pymysql
21
- Requires-Dist: pandas
22
- Requires-Dist: numpy
23
- Dynamic: author
24
- Dynamic: author-email
25
- Dynamic: classifier
26
- Dynamic: description
27
- Dynamic: description-content-type
28
- Dynamic: home-page
29
- Dynamic: license-file
30
- Dynamic: requires-dist
31
- Dynamic: requires-python
32
- Dynamic: summary
33
20
 
34
21
  # CPpackage
35
22
 
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.4
1
+ Metadata-Version: 2.1
2
2
  Name: cppackage
3
- Version: 0.2.9
3
+ Version: 0.3.0
4
4
  Summary: 超品集团自用的Python包
5
5
  Home-page: https://github.com/example/CPpackage
6
6
  Author: team-数智组
@@ -17,19 +17,6 @@ Classifier: Programming Language :: Python :: 3.12
17
17
  Requires-Python: >=3.8
18
18
  Description-Content-Type: text/markdown
19
19
  License-File: LICENSE
20
- Requires-Dist: pymysql
21
- Requires-Dist: pandas
22
- Requires-Dist: numpy
23
- Dynamic: author
24
- Dynamic: author-email
25
- Dynamic: classifier
26
- Dynamic: description
27
- Dynamic: description-content-type
28
- Dynamic: home-page
29
- Dynamic: license-file
30
- Dynamic: requires-dist
31
- Dynamic: requires-python
32
- Dynamic: summary
33
20
 
34
21
  # CPpackage
35
22
 
@@ -10,7 +10,7 @@ with open('readme.md', 'r', encoding='utf-8') as f:
10
10
  # 包的基本信息
11
11
  setup(
12
12
  name='cppackage',
13
- version='0.2.9',
13
+ version='0.3.0',
14
14
  description='超品集团自用的Python包',
15
15
  long_description=long_description,
16
16
  long_description_content_type='text/markdown',
@@ -1,24 +0,0 @@
1
- from CPpackage.db.sql_model import find_duplicate_records,delete_duplicate_records
2
- import os
3
-
4
-
5
- if __name__ == "__main__":
6
- # 配置你的参数
7
- TABLE_NAME = "pinlei_hgjk" # 替换为实际表名
8
- DATABASE_NAME = "shengyicanmou_copy" # 替换为实际数据库名
9
- # 你的唯一索引字段列表(必须是列表类型!)
10
- UNIQUE_FIELDS = ['begindate', 'enddate', 'value_l', 'sellerId', 'statDate', 'date_effect', 'store_id']
11
- PORT = 3306 # 可选,默认从配置获取
12
-
13
- # 第一步:查询重复记录
14
- dup_df = find_duplicate_records(TABLE_NAME, DATABASE_NAME, UNIQUE_FIELDS, PORT)
15
-
16
- # 第二步:确认有重复后删除
17
- if dup_df is not None and not dup_df.empty:
18
- delete_count = delete_duplicate_records(
19
- table_name=TABLE_NAME,
20
- database=DATABASE_NAME,
21
- unique_index_fields=UNIQUE_FIELDS, # 必须传列表!
22
- port=PORT,
23
- keep_strategy="min_id"
24
- )
File without changes
File without changes
File without changes
File without changes