mdbq 2.9.6__py3-none-any.whl → 2.9.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -71,8 +71,6 @@ class DataFrameConverter(object):
71
71
  pass
72
72
  if df[col].dtype == 'float' or df[col].dtype == 'float64': # 对于小数类型, 保留 6 位小数
73
73
  df[col] = df[col].fillna(0.0).apply(lambda x: round(x, 6))
74
- # df[col] = df[col].fillna(0.0).apply(lambda x: "{:.6f}".format(x))
75
- # df[col] = df[col].apply('float64')
76
74
 
77
75
  # 转换日期样式的列为日期类型
78
76
  value = df.loc[0, col]
@@ -84,7 +82,9 @@ class DataFrameConverter(object):
84
82
  df[col] = df[col].apply(lambda x: pd.to_datetime(x))
85
83
  except:
86
84
  pass
87
- new_col = re.sub(r'[()()-,,$&~^、* ]', '_', col.lower())
85
+ new_col = col.lower()
86
+ new_col = re.sub(r'[()\-,,&~^、 ()\"\'“”=·/。》《><!!`]', '_', new_col, re.IGNORECASE)
87
+ new_col = new_col.replace(')', '')
88
88
  new_col = re.sub(r'_{2,}', '_', new_col)
89
89
  new_col = re.sub(r'_+$', '', new_col)
90
90
  df.rename(columns={col: new_col}, inplace=True)
mdbq/mysql/mysql.py CHANGED
@@ -376,17 +376,17 @@ class MysqlUpload:
376
376
  cols = df.columns.tolist()
377
377
  for col in cols:
378
378
  df[col] = df[col].apply(lambda x: float(re.sub(r'%$', '', str(x))) / 100 if (
379
- str(x) != '' and str(x).endswith('%')) else '0.0' if str(x) == '0%' else x)
379
+ str(x) != '' and str(x).endswith('%')) and not re.findall('[\\u4e00-\\u9fa5]', str(x)) else '0.0' if str(x) == '0%' else x)
380
380
  try:
381
381
  df[col] = df[col].apply(
382
382
  lambda x: int(x) if '_' not in str(x) and '.' not in str(x) else x) # 不含小数点尝试转整数
383
383
  except:
384
384
  pass
385
- if df[col].dtype == 'object':
386
- try:
385
+ try:
386
+ if df[col].dtype == 'object': # 有些列没有被 pandas 识别数据类型,会没有 dtype 属性
387
387
  df[col] = df[col].apply(lambda x: float(x) if '.' in str(x) and '_' not in str(x) else x)
388
- except:
389
- pass
388
+ except:
389
+ pass
390
390
  new_col = col.lower()
391
391
  new_col = re.sub(r'[()\-,,&~^、 ()\"\'“”=·/。》《><!!`]', '_', new_col, re.IGNORECASE)
392
392
  new_col = new_col.replace(')', '')
@@ -432,7 +432,7 @@ class MysqlUpload:
432
432
  __res_dict.update({k: 'varchar(255)'})
433
433
  return __res_dict, df
434
434
 
435
- @try_except
435
+ # @try_except
436
436
  def df_to_mysql(self, df, db_name, table_name, set_typ=None, icm_update=[], move_insert=False, df_sql=False, drop_duplicates=False,
437
437
  filename=None, count=None, reset_id=False):
438
438
  """
@@ -532,18 +532,19 @@ class MysqlUpload:
532
532
  chunksize=1000
533
533
  )
534
534
  if reset_id:
535
- # 6. 重置自增列
536
- try:
537
- cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
538
- result = cursor.fetchone()
539
- if result:
540
- cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
541
- cursor.execute(
542
- f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
543
- cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
544
- except Exception as e:
545
- print(f'{e}')
546
- connection.rollback()
535
+ pass
536
+ # # 6. 重置自增列
537
+ # try:
538
+ # cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
539
+ # result = cursor.fetchone()
540
+ # if result:
541
+ # cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
542
+ # cursor.execute(
543
+ # f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
544
+ # cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
545
+ # except Exception as e:
546
+ # print(f'{e}')
547
+ # connection.rollback()
547
548
  connection.commit() # 提交事务
548
549
  connection.close()
549
550
  return
@@ -570,17 +571,18 @@ class MysqlUpload:
570
571
  )
571
572
  # 6. 重置自增列
572
573
  if reset_id:
573
- try:
574
- cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
575
- result = cursor.fetchone()
576
- if result:
577
- cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
578
- cursor.execute(
579
- f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
580
- cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
581
- except Exception as e:
582
- print(f'{e}')
583
- connection.rollback()
574
+ pass
575
+ # try:
576
+ # cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
577
+ # result = cursor.fetchone()
578
+ # if result:
579
+ # cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
580
+ # cursor.execute(
581
+ # f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
582
+ # cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
583
+ # except Exception as e:
584
+ # print(f'{e}')
585
+ # connection.rollback()
584
586
  connection.close()
585
587
  return
586
588
 
@@ -589,6 +591,9 @@ class MysqlUpload:
589
591
  # data 是传进来待处理的数据, 不是数据库数据
590
592
  # data 示例: {'日期': Timestamp('2024-08-27 00:00:00'), '推广费余额': 33299, '品销宝余额': 2930.73, '短信剩余': 67471}
591
593
  try:
594
+ cols = ', '.join(f"`{item}`" for item in data.keys()) # 列名需要转义
595
+ # data.update({item: f"{data[item]}" for item in data.keys()}) # 全部值转字符, 不是必须的
596
+ values = ', '.join([f'"{item}"' for item in data.values()]) # 值要加引号
592
597
  condition = []
593
598
  for k, v in data.items():
594
599
  condition += [f'`{k}` = "{v}"']
@@ -665,23 +670,24 @@ class MysqlUpload:
665
670
  sql = f"INSERT INTO `{table_name}` ({cols}) VALUES (%s);" % (values)
666
671
  cursor.execute(sql)
667
672
  except Exception as e:
673
+ pass
668
674
  # print(data)
669
675
  # print(values)
670
- print(f'mysql -> df_to_mysql 报错: {e}, {self.filename}')
676
+ # print(f'mysql -> df_to_mysql 报错: {e}, {self.filename}')
671
677
  # breakpoint()
672
678
 
673
- # 6. 重置自增列
674
- try:
675
- cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
676
- result = cursor.fetchone()
677
- if result:
678
- cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
679
- cursor.execute(
680
- f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
681
- cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
682
- except Exception as e:
683
- print(f'{e}')
684
- connection.rollback()
679
+ # # 6. 重置自增列
680
+ # try:
681
+ # cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
682
+ # result = cursor.fetchone()
683
+ # if result:
684
+ # cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
685
+ # cursor.execute(
686
+ # f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
687
+ # cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
688
+ # except Exception as e:
689
+ # print(f'{table_name}, -> {e}')
690
+ # connection.rollback()
685
691
  connection.commit() # 提交事务
686
692
  connection.close()
687
693
 
@@ -911,18 +917,18 @@ class OptimizeDatas:
911
917
  else: # 不存在日期列的情况
912
918
  self.delete_duplicate2(table_name=table_name)
913
919
 
914
- # 5. 重置自增列 (id 列)
915
- try:
916
- cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
917
- result = cursor.fetchone()
918
- if result:
919
- cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
920
- cursor.execute(
921
- f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
922
- cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
923
- except Exception as e:
924
- print(f'{e}')
925
- self.connection.rollback()
920
+ # # 5. 重置自增列 (id 列)
921
+ # try:
922
+ # cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
923
+ # result = cursor.fetchone()
924
+ # if result:
925
+ # cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
926
+ # cursor.execute(
927
+ # f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
928
+ # cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
929
+ # except Exception as e:
930
+ # print(f'{e}')
931
+ # self.connection.rollback()
926
932
  self.connection.close()
927
933
  now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
928
934
  print(f'{now}mysql({self.host}: {self.port}) {self.db_name} 数据库优化完成!')
mdbq/mysql/s_query.py CHANGED
@@ -92,6 +92,7 @@ class QueryDatas:
92
92
 
93
93
  if len(df) == 0:
94
94
  print(f'database: {db_name}, table: {table_name} 查询的数据为空')
95
+ return pd.DataFrame()
95
96
  cv = converter.DataFrameConverter()
96
97
  df = cv.convert_df_cols(df)
97
98
  return df
mdbq/spider/aikucun.py CHANGED
@@ -20,6 +20,9 @@ from selenium.webdriver.common.keys import Keys
20
20
  from mdbq.aggregation import aggregation
21
21
  from mdbq.clean import data_clean
22
22
  from mdbq.other import ua_sj
23
+ from mdbq.mysql import mysql
24
+ from mdbq.config import myconfig
25
+ import socket
23
26
 
24
27
  warnings.filterwarnings('ignore')
25
28
 
@@ -44,6 +47,23 @@ else:
44
47
  Share_Path = str(pathlib.Path('/Volumes/时尚事业部/01.运营部/天猫报表')) # 共享文件根目录
45
48
  Source_Path = str(pathlib.Path(Data_Path, '原始文件2'))
46
49
  upload_path = os.path.join(D_PATH, '数据上传中心') # 此目录位于下载文件夹
50
+ if socket.gethostname() == 'company' or socket.gethostname() == 'Mac2.local':
51
+ conf = myconfig.main()
52
+ conf_data = conf['Windows']['company']['mysql']['local']
53
+ username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data[
54
+ 'port']
55
+ else:
56
+ conf = myconfig.main()
57
+ conf_data = conf['Windows']['xigua_lx']['mysql']['local']
58
+ username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data[
59
+ 'port']
60
+ m_engine = mysql.MysqlUpload(
61
+ username=username,
62
+ password=password,
63
+ host=host,
64
+ port=port,
65
+ charset='utf8mb4'
66
+ )
47
67
 
48
68
 
49
69
  def get_cookie_aikucun():
@@ -262,6 +282,12 @@ class AikuCun:
262
282
  _driver.quit()
263
283
 
264
284
  def clean_data(self, date):
285
+ set_typ = {
286
+ '店铺名称': 'varchar(100)',
287
+ 'spu_id': 'varchar(100)',
288
+ '图片': 'varchar(255)',
289
+ '数据更新时间': 'timestamp',
290
+ }
265
291
  for root, dirs, files in os.walk(upload_path, topdown=False):
266
292
  for name in files:
267
293
  if '~$' in name or 'DS_Store' in name:
@@ -285,6 +311,21 @@ class AikuCun:
285
311
  # df['数据更新时间'] = df['数据更新时间'].apply(lambda x: re.sub(' ', ' ', str(x)) if x else x)
286
312
  # print(df['数据更新时间'])
287
313
  # breakpoint()
314
+
315
+ m_engine.df_to_mysql(
316
+ df=df,
317
+ db_name='爱库存2',
318
+ table_name='商品spu榜单',
319
+ icm_update=[], # 增量更新, 在聚合数据中使用,其他不要用
320
+ move_insert=False, # 先删除,再插入
321
+ df_sql=True, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
322
+ drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
323
+ count=None,
324
+ filename=None, # 用来追踪处理进度
325
+ reset_id=False, # 是否重置自增列
326
+ set_typ=set_typ,
327
+ )
328
+
288
329
  new_name = f'爱库存_商品榜单_spu_{date}_{date}.csv'
289
330
  df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False)
290
331
  os.remove(os.path.join(root, name))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 2.9.6
3
+ Version: 2.9.8
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,11 +1,12 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
2
  mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
- mdbq/aggregation/aggregation.py,sha256=3d_sx-cFrW-c03D5Ry9jf144Ph3d0znIl3IHmnInsYA,73902
4
+ mdbq/aggregation/aggregation.py,sha256=2KCVXZygQt4xVxGbFcDMBpL3PukY4yQF_uI-qLSTWaU,73460
5
5
  mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
6
6
  mdbq/aggregation/mysql_types.py,sha256=YTGyrF9vcRgfkQbpT-e-JdJ7c7VF1dDHgyx9YZRES8w,10934
7
7
  mdbq/aggregation/optimize_data.py,sha256=79uwiM2WqNNFxGpE2wKz742PRq-ZGgFjdOV0vgptHdY,3513
8
- mdbq/aggregation/query_data.py,sha256=0kiJQv7xLeH7kXxPmMiUUPYIlt5gcEyzSETmJTV372U,103891
8
+ mdbq/aggregation/query_data.py,sha256=e6vb4hPYZL5KWE6O-MtDoY13GWhx5YMDvTyD3rdgy3c,111441
9
+ mdbq/aggregation/query_data_bak.py,sha256=r1FU0C4zjXln7oVSrRkElh4Ehl-9mYhGcq57jLbViUA,104071
9
10
  mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
10
11
  mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
11
12
  mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
@@ -20,15 +21,15 @@ mdbq/config/products.py,sha256=ykvoQiA4OvFEYQ35wmCkREECdz0xIJzIs-Xix9mFpYI,6295
20
21
  mdbq/config/set_support.py,sha256=xkZCX6y9Bq1ppBpJAofld4B2YtchA7fl0eT3dx3CrSI,777
21
22
  mdbq/config/update_conf.py,sha256=taL3ZqKgiVWwUrDFuaYhim9a72Hm4BHRhhDscJTziR8,4535
22
23
  mdbq/dataframe/__init__.py,sha256=2HtCN8AdRj53teXDqzysC1h8aPL-mMFy561ESmhehGQ,22
23
- mdbq/dataframe/converter.py,sha256=s-5EzHU9HlcSHlKgRd3autGODaKn523av1N0gh_56YY,4614
24
+ mdbq/dataframe/converter.py,sha256=doWRcFMqf0_RQz7w5BxRNB8JeLetFSsNld43GWlhXXc,4600
24
25
  mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
25
26
  mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
26
27
  mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
27
28
  mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
28
29
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
29
- mdbq/mysql/mysql.py,sha256=sfETcLkxeOjDn-B6-ShHRPrq8BUJA9Qmtj4zyIIvi3w,59499
30
+ mdbq/mysql/mysql.py,sha256=_geeu7LP-Ur76kr1ka7_jdifnwrnJJjWnUBzlPXOQOQ,60119
30
31
  mdbq/mysql/recheck_mysql.py,sha256=rgTpvDMWYTyEn7UQdlig-pdXDluTgiU8JG6lkMh8DV0,8665
31
- mdbq/mysql/s_query.py,sha256=bgNNIqYLDCHjD5KTFcm6x4u74selpAGs5ouJYuqX86k,8447
32
+ mdbq/mysql/s_query.py,sha256=fnXncwSmA7CB0ELn1a-YxYZDrYkC2Bcgnj2J4dcQ8X8,8481
32
33
  mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
33
34
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
34
35
  mdbq/other/porxy.py,sha256=UHfgEyXugogvXgsG68a7QouUCKaohTKKkI4RN-kYSdQ,4961
@@ -42,8 +43,8 @@ mdbq/pbix/refresh_all_old.py,sha256=_pq3WSQ728GPtEG5pfsZI2uTJhU8D6ra-htIk1JXYzw,
42
43
  mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
43
44
  mdbq/req_post/req_tb.py,sha256=qg7pet73IgKGmCwxaeUyImJIoeK_pBQT9BBKD7fkBNg,36160
44
45
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
45
- mdbq/spider/aikucun.py,sha256=jHrdGWBJQaSywx7V-U4YuM6vWkwC5SR5tTOOdB3YU_c,17306
46
- mdbq-2.9.6.dist-info/METADATA,sha256=rYMAeGz0dY4meetEVC4l49GVUVRf3E9kzMqoLAn0GW4,243
47
- mdbq-2.9.6.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
48
- mdbq-2.9.6.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
49
- mdbq-2.9.6.dist-info/RECORD,,
46
+ mdbq/spider/aikucun.py,sha256=01qJo_Di5Kmi2lG5_HKb0OI283b1-Pgqh-nnA0pX4TY,19038
47
+ mdbq-2.9.8.dist-info/METADATA,sha256=-YqbseryNUHGqmaRj1Brh9FejzA62uUxgav_hmn14CQ,243
48
+ mdbq-2.9.8.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
49
+ mdbq-2.9.8.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
50
+ mdbq-2.9.8.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.44.0)
2
+ Generator: setuptools (70.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5