mdbq 3.8.11__py3-none-any.whl → 3.8.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/__version__.py +1 -1
- mdbq/aggregation/query_data.py +68 -73
- mdbq/mysql/mysql.py +34 -187
- mdbq/spider/aikucun.py +2 -2
- {mdbq-3.8.11.dist-info → mdbq-3.8.13.dist-info}/METADATA +1 -1
- {mdbq-3.8.11.dist-info → mdbq-3.8.13.dist-info}/RECORD +8 -8
- {mdbq-3.8.11.dist-info → mdbq-3.8.13.dist-info}/WHEEL +0 -0
- {mdbq-3.8.11.dist-info → mdbq-3.8.13.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION = '3.8.
|
1
|
+
VERSION = '3.8.13'
|
mdbq/aggregation/query_data.py
CHANGED
@@ -147,12 +147,12 @@ class MysqlDatasQuery:
|
|
147
147
|
'直接成交笔数': 'int',
|
148
148
|
'直接成交金额': 'decimal(12,2)',
|
149
149
|
}
|
150
|
-
self.pf_datas.append(
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
) # 制作其他聚合表
|
150
|
+
# self.pf_datas.append(
|
151
|
+
# {
|
152
|
+
# '集合名称': table_name,
|
153
|
+
# '数据主体': df[['日期', '店铺名称', '商品id', '花费', '成交金额', '直接成交金额']]
|
154
|
+
# }
|
155
|
+
# ) # 制作其他聚合表
|
156
156
|
self.pf_datas.append(
|
157
157
|
{
|
158
158
|
'集合名称': '天猫汇总表调用',
|
@@ -633,12 +633,12 @@ class MysqlDatasQuery:
|
|
633
633
|
'件均价': 'mediumint',
|
634
634
|
'价格带': 'varchar(100)',
|
635
635
|
}
|
636
|
-
self.pf_datas.append(
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
) # 制作其他聚合表
|
636
|
+
# self.pf_datas.append(
|
637
|
+
# {
|
638
|
+
# '集合名称': table_name,
|
639
|
+
# '数据主体': df[['日期', '店铺名称', '宝贝id', '销售额', '销售量', '退款额_发货后', '退货量_发货后']]
|
640
|
+
# }
|
641
|
+
# ) # 制作其他聚合表
|
642
642
|
if not self.update_service:
|
643
643
|
return
|
644
644
|
min_date = df['日期'].min()
|
@@ -1333,12 +1333,12 @@ class MysqlDatasQuery:
|
|
1333
1333
|
'二级类目': 'varchar(100)',
|
1334
1334
|
'三级类目': 'varchar(100)',
|
1335
1335
|
}
|
1336
|
-
self.pf_datas.append(
|
1337
|
-
|
1338
|
-
|
1339
|
-
|
1340
|
-
|
1341
|
-
) # 制作其他聚合表
|
1336
|
+
# self.pf_datas.append(
|
1337
|
+
# {
|
1338
|
+
# '集合名称': table_name,
|
1339
|
+
# '数据主体': df[['宝贝id', '商家编码']]
|
1340
|
+
# }
|
1341
|
+
# ) # 制作其他聚合表
|
1342
1342
|
if not self.update_service:
|
1343
1343
|
return
|
1344
1344
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
@@ -1390,12 +1390,12 @@ class MysqlDatasQuery:
|
|
1390
1390
|
'三级类目': 'varchar(100)',
|
1391
1391
|
'更新时间': 'timestamp'
|
1392
1392
|
}
|
1393
|
-
self.pf_datas.append(
|
1394
|
-
|
1395
|
-
|
1396
|
-
|
1397
|
-
|
1398
|
-
) # 制作其他聚合表
|
1393
|
+
# self.pf_datas.append(
|
1394
|
+
# {
|
1395
|
+
# '集合名称': table_name,
|
1396
|
+
# '数据主体': df[['宝贝id', '商家编码']]
|
1397
|
+
# }
|
1398
|
+
# ) # 制作其他聚合表
|
1399
1399
|
if not self.update_service:
|
1400
1400
|
return
|
1401
1401
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
@@ -1447,12 +1447,12 @@ class MysqlDatasQuery:
|
|
1447
1447
|
'日期': 'date',
|
1448
1448
|
'商品链接': 'varchar(255)',
|
1449
1449
|
}
|
1450
|
-
self.pf_datas.append(
|
1451
|
-
|
1452
|
-
|
1453
|
-
|
1454
|
-
|
1455
|
-
) # 制作其他聚合表
|
1450
|
+
# self.pf_datas.append(
|
1451
|
+
# {
|
1452
|
+
# '集合名称': table_name,
|
1453
|
+
# '数据主体': df[['商品id', '商品图片']]
|
1454
|
+
# }
|
1455
|
+
# ) # 制作其他聚合表
|
1456
1456
|
if not self.update_service:
|
1457
1457
|
return
|
1458
1458
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
@@ -1505,13 +1505,13 @@ class MysqlDatasQuery:
|
|
1505
1505
|
'sku地址': 'varchar(255)',
|
1506
1506
|
'更新时间': 'timestamp'
|
1507
1507
|
}
|
1508
|
-
# 制作其他聚合表
|
1509
|
-
self.pf_datas.append(
|
1510
|
-
|
1511
|
-
|
1512
|
-
|
1513
|
-
|
1514
|
-
)
|
1508
|
+
# # 制作其他聚合表
|
1509
|
+
# self.pf_datas.append(
|
1510
|
+
# {
|
1511
|
+
# '集合名称': table_name,
|
1512
|
+
# '数据主体': df[['商品id', '商品图片']]
|
1513
|
+
# }
|
1514
|
+
# )
|
1515
1515
|
if not self.update_service: # 调试加,是否继续执行下面的入库操作
|
1516
1516
|
return
|
1517
1517
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
@@ -1852,12 +1852,12 @@ class MysqlDatasQuery:
|
|
1852
1852
|
'天猫页面价': 'decimal(10,2)',
|
1853
1853
|
'天猫中促价': 'decimal(10,2)',
|
1854
1854
|
}
|
1855
|
-
self.pf_datas.append(
|
1856
|
-
|
1857
|
-
|
1858
|
-
|
1859
|
-
|
1860
|
-
) # 制作其他聚合表
|
1855
|
+
# self.pf_datas.append(
|
1856
|
+
# {
|
1857
|
+
# '集合名称': table_name,
|
1858
|
+
# '数据主体': df[['款号', '成本价']]
|
1859
|
+
# }
|
1860
|
+
# ) # 制作其他聚合表
|
1861
1861
|
if not self.update_service:
|
1862
1862
|
return
|
1863
1863
|
min_date = pd.to_datetime(df['日期'].min()).strftime('%Y-%m-%d')
|
@@ -1951,12 +1951,12 @@ class MysqlDatasQuery:
|
|
1951
1951
|
p = df.pop('spu_id')
|
1952
1952
|
df.insert(loc=3, column='spu_id', value=p)
|
1953
1953
|
|
1954
|
-
self.pf_datas_jd.append(
|
1955
|
-
|
1956
|
-
|
1957
|
-
|
1958
|
-
|
1959
|
-
) # 制作其他聚合表
|
1954
|
+
# self.pf_datas_jd.append(
|
1955
|
+
# {
|
1956
|
+
# '集合名称': table_name,
|
1957
|
+
# '数据主体': df[['日期', '产品线', '触发sku_id', '跟单sku_id', '花费']]
|
1958
|
+
# }
|
1959
|
+
# ) # 制作其他聚合表
|
1960
1960
|
if not self.update_service:
|
1961
1961
|
return
|
1962
1962
|
set_typ = {
|
@@ -2204,12 +2204,12 @@ class MysqlDatasQuery:
|
|
2204
2204
|
idx = df.groupby(['日期', '店铺名称', '商品id', '货号', '访客数', '成交客户数', '加购商品件数', '加购人数'])['更新时间'].idxmax()
|
2205
2205
|
df = df.loc[idx]
|
2206
2206
|
df = df[['日期', '店铺名称', '商品id', '货号', '访客数', '成交客户数', '加购商品件数', '加购人数', '成交单量', '成交金额']]
|
2207
|
-
self.pf_datas_jd.append(
|
2208
|
-
|
2209
|
-
|
2210
|
-
|
2211
|
-
|
2212
|
-
) # 制作其他聚合表
|
2207
|
+
# self.pf_datas_jd.append(
|
2208
|
+
# {
|
2209
|
+
# '集合名称': table_name,
|
2210
|
+
# '数据主体': df
|
2211
|
+
# }
|
2212
|
+
# ) # 制作其他聚合表
|
2213
2213
|
if not self.update_service:
|
2214
2214
|
return
|
2215
2215
|
set_typ = {
|
@@ -3758,15 +3758,6 @@ class MysqlDatasQuery:
|
|
3758
3758
|
'直接成交金额': 'float64',
|
3759
3759
|
'自然流量曝光量': 'int64',
|
3760
3760
|
}, errors='raise')
|
3761
|
-
# tg = tg.groupby(['日期', '推广渠道', '营销场景', '商品id', '花费', '展现量', '点击量'], as_index=False).agg(
|
3762
|
-
# **{'加购量': ('加购量', np.max),
|
3763
|
-
# '成交笔数': ('成交笔数', np.max),
|
3764
|
-
# '成交金额': ('成交金额', np.max),
|
3765
|
-
# '自然流量曝光量': ('自然流量曝光量', np.max),
|
3766
|
-
# '直接成交笔数': ('直接成交笔数', np.max),
|
3767
|
-
# '直接成交金额': ('直接成交金额', np.max)
|
3768
|
-
# }
|
3769
|
-
# )
|
3770
3761
|
|
3771
3762
|
df = pd.concat([tg, zb, pxb], axis=0, ignore_index=True)
|
3772
3763
|
df.fillna(0, inplace=True) # concat 之后要填充空值
|
@@ -3994,25 +3985,29 @@ def query1(months=1, less_dict=[]):
|
|
3994
3985
|
sdq.months = months # 设置数据周期, 1 表示近 2 个月
|
3995
3986
|
sdq.update_service = True # 调试时加,true: 将数据写入 mysql 服务器
|
3996
3987
|
|
3997
|
-
|
3998
|
-
sdq._sj_wxt(db_name='聚合数据', table_name='圣积天猫店_主体报表')
|
3999
|
-
sdq._tb_wxt(db_name='聚合数据', table_name='淘宝_主体报表')
|
3988
|
+
# 依赖表 -- >>
|
4000
3989
|
sdq.tg_wxt(db_name='聚合数据', table_name='天猫_主体报表')
|
3990
|
+
sdq.tg_cjzb(db_name='聚合数据', table_name='天猫_超级直播')
|
3991
|
+
sdq.pxb_zh(db_name='聚合数据', table_name='天猫_品销宝账户报表')
|
3992
|
+
# 依赖表 << --
|
3993
|
+
|
4001
3994
|
sdq.syj(db_name='聚合数据', table_name='生意经_宝贝指标')
|
4002
3995
|
sdq.idbm(db_name='聚合数据', table_name='商品id编码表')
|
4003
3996
|
sdq.sp_picture(db_name='聚合数据', table_name='商品id图片对照表')
|
4004
3997
|
sdq.sp_cost(db_name='聚合数据', table_name='商品成本')
|
4005
3998
|
sdq.jdjzt(db_name='聚合数据', table_name='京东_京准通')
|
4006
|
-
sdq.jdqzyx(db_name='聚合数据', table_name='京东_京准通_全站营销')
|
4007
3999
|
sdq.sku_sales(db_name='聚合数据', table_name='京东_sku_商品明细')
|
4000
|
+
|
4001
|
+
sdq._ald_wxt(db_name='聚合数据', table_name='奥莱店_主体报表')
|
4002
|
+
sdq._sj_wxt(db_name='聚合数据', table_name='圣积天猫店_主体报表')
|
4003
|
+
sdq._tb_wxt(db_name='聚合数据', table_name='淘宝_主体报表')
|
4004
|
+
sdq.jdqzyx(db_name='聚合数据', table_name='京东_京准通_全站营销')
|
4008
4005
|
sdq.spu_sales(db_name='聚合数据', table_name='京东_spu_商品明细')
|
4009
|
-
sdq.tg_cjzb(db_name='聚合数据', table_name='天猫_超级直播')
|
4010
|
-
sdq.pxb_zh(db_name='聚合数据', table_name='天猫_品销宝账户报表')
|
4011
4006
|
sdq.zb_ccfx(db_name='聚合数据', table_name='生意参谋_直播场次分析')
|
4012
4007
|
sdq.tg_by_day(db_name='聚合数据', table_name='多店推广场景_按日聚合')
|
4013
|
-
sdq.performance(bb_tg=True, db_name='聚合数据', table_name='_全店商品销售') # _全店商品销售
|
4014
|
-
sdq.performance(bb_tg=False, db_name='聚合数据', table_name='_推广商品销售') # _推广商品销售
|
4015
|
-
sdq.performance_jd(jd_tg=False, db_name='聚合数据', table_name='_京东_推广商品销售') # _推广商品销售
|
4008
|
+
# sdq.performance(bb_tg=True, db_name='聚合数据', table_name='_全店商品销售') # _全店商品销售
|
4009
|
+
# sdq.performance(bb_tg=False, db_name='聚合数据', table_name='_推广商品销售') # _推广商品销售
|
4010
|
+
# sdq.performance_jd(jd_tg=False, db_name='聚合数据', table_name='_京东_推广商品销售') # _推广商品销售
|
4016
4011
|
sdq.performance_concat(bb_tg=False, db_name='聚合数据', table_name='天猫_推广汇总') # _推广商品销售
|
4017
4012
|
|
4018
4013
|
|
mdbq/mysql/mysql.py
CHANGED
@@ -246,32 +246,6 @@ class MysqlUpload:
|
|
246
246
|
|
247
247
|
if reset_id:
|
248
248
|
pass
|
249
|
-
# # 6. 重置自增列
|
250
|
-
# try:
|
251
|
-
# # 查询所有复合主键
|
252
|
-
# sql = (
|
253
|
-
# f"SELECT `COLUMN_NAME` AS `PrimaryKey` FROM `information_schema`.`COLUMNS` "
|
254
|
-
# f"WHERE `TABLE_SCHEMA` = '{db_name}'AND `TABLE_NAME` = '{table_name}' AND `COLUMN_KEY` = 'PRI';"
|
255
|
-
# )
|
256
|
-
# cursor.execute(sql)
|
257
|
-
# result = cursor.fetchall() # 复合主键数
|
258
|
-
# if len(result) <= 1: # 如果存在复合主键,则不能直接删除 id 键,其他主键可能不是唯一,会报错
|
259
|
-
# column_name = 'id'
|
260
|
-
# sql = (f'SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS '
|
261
|
-
# f'WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND COLUMN_NAME = %s')
|
262
|
-
# cursor.execute(sql, (db_name, table_name, column_name))
|
263
|
-
# result = cursor.fetchone()
|
264
|
-
# if result:
|
265
|
-
# sql = f"ALTER TABLE `{table_name}` DROP COLUMN {column_name}" # 删除 id 列
|
266
|
-
# cursor.execute(sql)
|
267
|
-
# cursor.execute(
|
268
|
-
# f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
269
|
-
# cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
270
|
-
# else:
|
271
|
-
# logger.info(f'{table_name} 存在复合主键: 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
|
272
|
-
# except Exception as e:
|
273
|
-
# logger.error(f'333 {table_name} {e}')
|
274
|
-
# connection.rollback()
|
275
249
|
connection.commit()
|
276
250
|
|
277
251
|
@try_except
|
@@ -380,23 +354,6 @@ class MysqlUpload:
|
|
380
354
|
else:
|
381
355
|
sql = f"ALTER TABLE `{table_name}` ADD UNIQUE (`{col}`)"
|
382
356
|
cursor.execute(sql)
|
383
|
-
# if col in main_key or col in unique_main_key:
|
384
|
-
# sql = f"SHOW INDEXES FROM `{table_name}` WHERE `Column_name` = %s"
|
385
|
-
# cursor.execute(sql, (col))
|
386
|
-
# result = cursor.fetchone() # 检查索引是否存在
|
387
|
-
# if not result:
|
388
|
-
# if col in main_key:
|
389
|
-
# sql = f"CREATE INDEX index_name ON `{table_name}`(`{col}`);"
|
390
|
-
# logger.info(f"设置为索引: {col}({dtypes[col]})")
|
391
|
-
# cursor.execute(sql)
|
392
|
-
# elif col in unique_main_key:
|
393
|
-
# if dtypes[col] == 'mediumtext':
|
394
|
-
# sql = f"CREATE INDEX UNIQUE index_name ON `{table_name}` (`{col}`({index_length}));"
|
395
|
-
# else:
|
396
|
-
# sql = f"CREATE INDEX UNIQUE index_name ON `{table_name}` (`{col}`);"
|
397
|
-
# logger.info(f"设置唯一索引: {col}({dtypes[col]})")
|
398
|
-
# logger.info(sql)
|
399
|
-
# cursor.execute(sql)
|
400
357
|
connection.commit() # 提交事务
|
401
358
|
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
402
359
|
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
@@ -455,11 +412,23 @@ class MysqlUpload:
|
|
455
412
|
# logger.info(sql)
|
456
413
|
cursor.execute(sql)
|
457
414
|
else: # 没有数据返回,则直接插入数据
|
458
|
-
cols = ', '.join(f"`{item}`" for item in dict_data.keys()) # 列名需要转义
|
459
|
-
# data.update({item: f"{data[item]}" for item in data.keys()}) # 全部值转字符, 不是必须的
|
460
|
-
values = ', '.join([f'"{item}"' for item in dict_data.values()]) # 值要加引号
|
461
|
-
sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({values});"
|
462
|
-
cursor.execute(sql)
|
415
|
+
# cols = ', '.join(f"`{item}`" for item in dict_data.keys()) # 列名需要转义
|
416
|
+
# # data.update({item: f"{data[item]}" for item in data.keys()}) # 全部值转字符, 不是必须的
|
417
|
+
# values = ', '.join([f'"{item}"' for item in dict_data.values()]) # 值要加引号
|
418
|
+
# sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({values});"
|
419
|
+
# cursor.execute(sql)
|
420
|
+
|
421
|
+
# 清理和验证列名
|
422
|
+
safe_columns = [f"`{escape_string(str(col))}`" for col in dict_data.keys()]
|
423
|
+
cols = ", ".join(safe_columns)
|
424
|
+
# 使用参数化查询
|
425
|
+
placeholders = ", ".join(["%s"] * len(dict_data))
|
426
|
+
sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({placeholders})"
|
427
|
+
# 转义值并作为参数传递
|
428
|
+
escaped_values = [escape_string(str(v)) if isinstance(v, str) else v for v in
|
429
|
+
dict_data.values()]
|
430
|
+
cursor.execute(sql, tuple(escaped_values))
|
431
|
+
|
463
432
|
connection.commit() # 提交数据库
|
464
433
|
continue
|
465
434
|
|
@@ -585,23 +554,6 @@ class MysqlUpload:
|
|
585
554
|
else:
|
586
555
|
sql = f"ALTER TABLE `{table_name}` ADD UNIQUE (`{col}`)"
|
587
556
|
cursor.execute(sql)
|
588
|
-
# if col in main_key or col in unique_main_key:
|
589
|
-
# sql = f"SHOW INDEXES FROM `{table_name}` WHERE `Column_name` = %s"
|
590
|
-
# cursor.execute(sql, (col))
|
591
|
-
# result = cursor.fetchone() # 检查索引是否存在
|
592
|
-
# if not result:
|
593
|
-
# if col in main_key:
|
594
|
-
# sql = f"CREATE INDEX index_name ON `{table_name}`(`{col}`);"
|
595
|
-
# logger.info(f"设置为索引: {col}({dtypes[col]})")
|
596
|
-
# cursor.execute(sql)
|
597
|
-
# elif col in unique_main_key:
|
598
|
-
# if dtypes[col] == 'mediumtext':
|
599
|
-
# sql = f"CREATE INDEX UNIQUE index_name ON `{table_name}` (`{col}`({index_length}));"
|
600
|
-
# else:
|
601
|
-
# sql = f"CREATE INDEX UNIQUE index_name ON `{table_name}` (`{col}`);"
|
602
|
-
# logger.info(f"设置唯一索引: {col}({dtypes[col]})")
|
603
|
-
# logger.info(sql)
|
604
|
-
# cursor.execute(sql)
|
605
557
|
connection.commit() # 提交事务
|
606
558
|
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
607
559
|
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
@@ -657,11 +609,22 @@ class MysqlUpload:
|
|
657
609
|
# logger.info(sql)
|
658
610
|
cursor.execute(sql)
|
659
611
|
else: # 没有数据返回,则直接插入数据
|
660
|
-
cols = ', '.join(f"`{item}`" for item in dict_data.keys()) # 列名需要转义
|
661
|
-
# data.update({item: f"{data[item]}" for item in data.keys()}) # 全部值转字符, 不是必须的
|
662
|
-
values = ', '.join([f'"{item}"' for item in dict_data.values()]) # 值要加引号
|
663
|
-
sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({values});"
|
664
|
-
cursor.execute(sql)
|
612
|
+
# cols = ', '.join(f"`{item}`" for item in dict_data.keys()) # 列名需要转义
|
613
|
+
# # data.update({item: f"{data[item]}" for item in data.keys()}) # 全部值转字符, 不是必须的
|
614
|
+
# values = ', '.join([f'"{item}"' for item in dict_data.values()]) # 值要加引号
|
615
|
+
# sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({values});"
|
616
|
+
# cursor.execute(sql)
|
617
|
+
|
618
|
+
# 构建安全的INSERT语句
|
619
|
+
safe_keys = [f"`{escape_string(str(k))}`" for k in dict_data.keys()]
|
620
|
+
keys_data = ", ".join(safe_keys)
|
621
|
+
placeholders = ", ".join(["%s"] * len(dict_data))
|
622
|
+
|
623
|
+
# 使用参数化查询
|
624
|
+
sql = f"INSERT INTO `{table_name}` ({keys_data}) VALUES ({placeholders}) ON DUPLICATE KEY UPDATE {update_datas}"
|
625
|
+
escaped_values = [escape_string(str(v)) if isinstance(v, str) else v for v in dict_data.values()]
|
626
|
+
cursor.execute(sql, tuple(escaped_values))
|
627
|
+
|
665
628
|
connection.commit() # 提交数据库
|
666
629
|
connection.close()
|
667
630
|
return
|
@@ -703,9 +666,8 @@ class MysqlUpload:
|
|
703
666
|
if str(v) == '':
|
704
667
|
v = 0
|
705
668
|
v = str(v)
|
706
|
-
# v = re.sub('^-$|^--$|^nan$|^null$', '0', v, re.I)
|
707
|
-
# v = re.sub(',|="|"', '', v, re.I)
|
708
669
|
v = re.sub('^="|"$', '', v, re.I)
|
670
|
+
v = re.sub(r'[\x00-\x1F\x7F-\x9F]', '', str(v)) # 移除控制字符
|
709
671
|
if re.findall(r'^[-+]?\d+\.?\d*%$', v):
|
710
672
|
v = str(float(v.rstrip("%")) / 100)
|
711
673
|
|
@@ -919,35 +881,6 @@ class MysqlUpload:
|
|
919
881
|
)
|
920
882
|
if reset_id:
|
921
883
|
pass
|
922
|
-
# # 6. 重置自增列
|
923
|
-
# try:
|
924
|
-
# # 查询所有复合主键
|
925
|
-
# sql = (
|
926
|
-
# f"SELECT `COLUMN_NAME` AS `PrimaryKey` FROM `information_schema`.`COLUMNS` "
|
927
|
-
# f"WHERE `TABLE_SCHEMA` = '{db_name}'AND `TABLE_NAME` = '{table_name}' AND `COLUMN_KEY` = 'PRI';"
|
928
|
-
# )
|
929
|
-
# cursor.execute(sql)
|
930
|
-
# result = cursor.fetchall() # 复合主键数
|
931
|
-
# if len(result) <= 1: # 如果存在复合主键,则不能直接删除 id 键,其他主键可能不是唯一,会报错
|
932
|
-
# column_name = 'id'
|
933
|
-
# sql = (f'SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS '
|
934
|
-
# f'WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND COLUMN_NAME = %s')
|
935
|
-
# # cursor.execute(f"SHOW COLUMNS FROM `{table_name}` LIKE 'id'")
|
936
|
-
# cursor.execute(sql, (db_name, table_name, column_name))
|
937
|
-
# result = cursor.fetchone()
|
938
|
-
# if result:
|
939
|
-
# # cursor.execute(f"ALTER TABLE `{table_name}` DROP COLUMN id;") # 删除 id 列
|
940
|
-
# sql = f"ALTER TABLE `{table_name}` DROP COLUMN {column_name}" # 删除 id 列
|
941
|
-
# cursor.execute(sql)
|
942
|
-
# cursor.execute(
|
943
|
-
# f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
944
|
-
# cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
945
|
-
# # logger.info(f'重置自增id')
|
946
|
-
# else:
|
947
|
-
# logger.info(f'{table_name} 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
|
948
|
-
# except Exception as e:
|
949
|
-
# logger.error(f'333 {table_name} {e}')
|
950
|
-
# connection.rollback()
|
951
884
|
connection.commit() # 提交事务
|
952
885
|
connection.close()
|
953
886
|
return
|
@@ -975,36 +908,6 @@ class MysqlUpload:
|
|
975
908
|
index=False,
|
976
909
|
chunksize=1000
|
977
910
|
)
|
978
|
-
# # 6. 重置自增列
|
979
|
-
# try:
|
980
|
-
# # 查询所有复合主键
|
981
|
-
# sql = (
|
982
|
-
# f"SELECT `COLUMN_NAME` AS `PrimaryKey` FROM `information_schema`.`COLUMNS` "
|
983
|
-
# f"WHERE `TABLE_SCHEMA` = '{db_name}'AND `TABLE_NAME` = '{table_name}' AND `COLUMN_KEY` = 'PRI';"
|
984
|
-
# )
|
985
|
-
# cursor.execute(sql)
|
986
|
-
# result = cursor.fetchall() # 复合主键数
|
987
|
-
# if len(result) <= 1: # 如果存在复合主键,则不能直接删除 id 键,其他主键可能不是唯一,会报错
|
988
|
-
# column_name = 'id'
|
989
|
-
# sql = (f'SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS '
|
990
|
-
# f'WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND COLUMN_NAME = %s')
|
991
|
-
# # cursor.execute(f"SHOW COLUMNS FROM `{table_name}` LIKE 'id'")
|
992
|
-
# cursor.execute(sql, (db_name, table_name, column_name))
|
993
|
-
# result = cursor.fetchone()
|
994
|
-
# if result:
|
995
|
-
# # cursor.execute(f"ALTER TABLE `{table_name}` DROP COLUMN id;") # 删除 id 列
|
996
|
-
# sql = f"ALTER TABLE `{table_name}` DROP COLUMN {column_name}" # 删除 id 列
|
997
|
-
# cursor.execute(sql)
|
998
|
-
# cursor.execute(
|
999
|
-
# f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
1000
|
-
# cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
1001
|
-
# # logger.info(f'重置自增id')
|
1002
|
-
# else:
|
1003
|
-
# logger.info(f'{table_name} 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
|
1004
|
-
# except Exception as e:
|
1005
|
-
# logger.error(f'333 {table_name} {e}')
|
1006
|
-
# connection.rollback()
|
1007
|
-
# connection.close()
|
1008
911
|
return
|
1009
912
|
|
1010
913
|
datas = df.to_dict(orient='records')
|
@@ -1095,35 +998,6 @@ class MysqlUpload:
|
|
1095
998
|
|
1096
999
|
if reset_id:
|
1097
1000
|
pass
|
1098
|
-
# # 6. 重置自增列
|
1099
|
-
# try:
|
1100
|
-
# # 查询所有复合主键
|
1101
|
-
# sql = (
|
1102
|
-
# f"SELECT `COLUMN_NAME` AS `PrimaryKey` FROM `information_schema`.`COLUMNS` "
|
1103
|
-
# f"WHERE `TABLE_SCHEMA` = '{db_name}'AND `TABLE_NAME` = '{table_name}' AND `COLUMN_KEY` = 'PRI';"
|
1104
|
-
# )
|
1105
|
-
# cursor.execute(sql)
|
1106
|
-
# result = cursor.fetchall() # 复合主键数
|
1107
|
-
# if len(result) <= 1: # 如果存在复合主键,则不能直接删除 id 键,其他主键可能不是唯一,会报错
|
1108
|
-
# column_name = 'id'
|
1109
|
-
# sql = (f'SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS '
|
1110
|
-
# f'WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND COLUMN_NAME = %s')
|
1111
|
-
# # cursor.execute(f"SHOW COLUMNS FROM `{table_name}` LIKE 'id'")
|
1112
|
-
# cursor.execute(sql, (db_name, table_name, column_name))
|
1113
|
-
# result = cursor.fetchone()
|
1114
|
-
# if result:
|
1115
|
-
# # cursor.execute(f"ALTER TABLE `{table_name}` DROP COLUMN id;") # 删除 id 列
|
1116
|
-
# sql = f"ALTER TABLE `{table_name}` DROP COLUMN {column_name}" # 删除 id 列
|
1117
|
-
# cursor.execute(sql)
|
1118
|
-
# cursor.execute(
|
1119
|
-
# f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
1120
|
-
# cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
1121
|
-
# # logger.info(f'重置自增id')
|
1122
|
-
# else:
|
1123
|
-
# logger.info(f'{table_name} 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
|
1124
|
-
# except Exception as e:
|
1125
|
-
# logger.error(f'333 {table_name} {e}')
|
1126
|
-
# connection.rollback()
|
1127
1001
|
connection.commit() # 提交事务
|
1128
1002
|
connection.close()
|
1129
1003
|
|
@@ -1405,33 +1279,6 @@ class OptimizeDatas:
|
|
1405
1279
|
self.end_date = end_date_before
|
1406
1280
|
else: # 不存在日期列的情况
|
1407
1281
|
self.delete_duplicate2(table_name=table_name, except_key=except_key)
|
1408
|
-
|
1409
|
-
# # 6. 重置自增列
|
1410
|
-
# try:
|
1411
|
-
# # 查询所有复合主键
|
1412
|
-
# sql = (
|
1413
|
-
# f"SELECT `COLUMN_NAME` AS `PrimaryKey` FROM `information_schema`.`COLUMNS` "
|
1414
|
-
# f"WHERE `TABLE_SCHEMA` = '{self.db_name}'AND `TABLE_NAME` = '{table_name}' AND `COLUMN_KEY` = 'PRI';"
|
1415
|
-
# )
|
1416
|
-
# cursor.execute(sql)
|
1417
|
-
# result = cursor.fetchall() # 复合主键数
|
1418
|
-
# if len(result) <= 1: # 如果存在复合主键,则不能直接删除 id 键,其他主键可能不是唯一,会报错
|
1419
|
-
# column_name = 'id'
|
1420
|
-
# sql = (f'SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS '
|
1421
|
-
# f'WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND COLUMN_NAME = %s')
|
1422
|
-
# cursor.execute(sql, (self.db_name, table_name, column_name))
|
1423
|
-
# result = cursor.fetchone()
|
1424
|
-
# if result:
|
1425
|
-
# sql = f"ALTER TABLE `{table_name}` DROP COLUMN {column_name}" # 删除 id 列
|
1426
|
-
# cursor.execute(sql)
|
1427
|
-
# cursor.execute(
|
1428
|
-
# f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
1429
|
-
# cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
1430
|
-
# else:
|
1431
|
-
# logger.info(f'{table_name} 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
|
1432
|
-
# except Exception as e:
|
1433
|
-
# logger.error(f'333 {table_name} {e}')
|
1434
|
-
# self.connection.rollback()
|
1435
1282
|
self.connection.close()
|
1436
1283
|
logger.info(f'mysql({self.host}: {self.port}) {self.db_name} 数据库优化完成!')
|
1437
1284
|
|
mdbq/spider/aikucun.py
CHANGED
@@ -262,7 +262,7 @@ class AikuCun:
|
|
262
262
|
print(f'正在获取数据({num}/{len(date_list)}): {item_type}榜单 {date}')
|
263
263
|
# print(res.json())
|
264
264
|
if not res.json()['success']:
|
265
|
-
print('没有获取到数据,
|
265
|
+
print('没有获取到数据, 请求不成功, 如果连续请求失败 > 5, 则需重新获取cookie后继续')
|
266
266
|
num += 1
|
267
267
|
self.error_count += 1
|
268
268
|
time.sleep(1)
|
@@ -484,6 +484,6 @@ def main(start_date, end_date, item_type=['spu']):
|
|
484
484
|
if __name__ == '__main__':
|
485
485
|
main(
|
486
486
|
start_date='2025-03-01',
|
487
|
-
end_date='2025-03-
|
487
|
+
# end_date='2025-03-26', # 不传则默认到今天
|
488
488
|
item_type=['spu', 'sku']
|
489
489
|
)
|
@@ -1,13 +1,13 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
|
-
mdbq/__version__.py,sha256=
|
2
|
+
mdbq/__version__.py,sha256=cmqMpHGJCaMGsHUAVyC67-oQIJBBnfayuPIXO2haNMo,18
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/query_data.py,sha256
|
4
|
+
mdbq/aggregation/query_data.py,sha256=9doVoOBEaLyHlT0fVEXpWtwLLV4NhpQs17kHQQ0p3ys,185443
|
5
5
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
6
6
|
mdbq/config/config.py,sha256=eaTfrfXQ65xLqjr5I8-HkZd_jEY1JkGinEgv3TSLeoQ,3170
|
7
7
|
mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
|
8
8
|
mdbq/log/spider_logging.py,sha256=-ozWWEGm3HVv604ozs_OOvVwumjokmUPwbaodesUrPY,1664
|
9
9
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
10
|
-
mdbq/mysql/mysql.py,sha256=
|
10
|
+
mdbq/mysql/mysql.py,sha256=xCWY-np41FE1yrQqVWs0AzKZK9DHDFlZ_gbnog5LaNY,77860
|
11
11
|
mdbq/mysql/s_query.py,sha256=X055aLRAgxVvueXx4NbfNjp6MyBI02_XBb1pTKw09L0,8660
|
12
12
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
13
13
|
mdbq/other/download_sku_picture.py,sha256=YU8DxKMXbdeE1OOKEA848WVp62jYHw5O4tXTjUdq9H0,44832
|
@@ -20,8 +20,8 @@ mdbq/pbix/refresh_all.py,sha256=OBT9EewSZ0aRS9vL_FflVn74d4l2G00wzHiikCC4TC0,5926
|
|
20
20
|
mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
|
21
21
|
mdbq/redis/getredis.py,sha256=Uk8-cOWT0JU1qRyIVqdbYokSLvkDIAfcokmYj1ebw8k,24104
|
22
22
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
23
|
-
mdbq/spider/aikucun.py,sha256=
|
24
|
-
mdbq-3.8.
|
25
|
-
mdbq-3.8.
|
26
|
-
mdbq-3.8.
|
27
|
-
mdbq-3.8.
|
23
|
+
mdbq/spider/aikucun.py,sha256=m7ZIvrc9pqoGCYEH3FtgKTwqhX7QB6qzgc2twDzhX4w,19962
|
24
|
+
mdbq-3.8.13.dist-info/METADATA,sha256=HEw9yfBRwMt6PWbT5Mr1O6pwyYaZVKCT0Yf6qt8hrzU,364
|
25
|
+
mdbq-3.8.13.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
26
|
+
mdbq-3.8.13.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
27
|
+
mdbq-3.8.13.dist-info/RECORD,,
|
File without changes
|
File without changes
|