mdbq 4.0.44__py3-none-any.whl → 4.0.46__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/__version__.py +1 -1
- mdbq/aggregation/query_data.py +7 -4
- mdbq/mysql/uploader.py +36 -2
- {mdbq-4.0.44.dist-info → mdbq-4.0.46.dist-info}/METADATA +1 -1
- {mdbq-4.0.44.dist-info → mdbq-4.0.46.dist-info}/RECORD +7 -7
- {mdbq-4.0.44.dist-info → mdbq-4.0.46.dist-info}/WHEEL +0 -0
- {mdbq-4.0.44.dist-info → mdbq-4.0.46.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION = '4.0.
|
1
|
+
VERSION = '4.0.46'
|
mdbq/aggregation/query_data.py
CHANGED
@@ -1514,25 +1514,29 @@ class MysqlDatasQuery:
|
|
1514
1514
|
end_date=end_date,
|
1515
1515
|
projection=projection,
|
1516
1516
|
)
|
1517
|
+
df.pop('data_sku')
|
1517
1518
|
df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore') # 转换日期列
|
1518
1519
|
df_set['商品id'] = df_set['商品id'].astype('int64')
|
1519
1520
|
df['商品id'] = df['商品id'].astype('int64')
|
1520
1521
|
df_set.sort_values('商品id', ascending=False, ignore_index=True, inplace=True)
|
1521
1522
|
|
1523
|
+
# 仅保留最新日期的数据
|
1524
|
+
idx = df.groupby(['商品id'])['更新时间'].idxmax()
|
1525
|
+
df = df.loc[idx]
|
1526
|
+
|
1522
1527
|
def check_year(item_id):
|
1523
1528
|
for item in df_set.to_dict(orient='records'):
|
1524
1529
|
if item_id > item['商品id']:
|
1525
1530
|
return item['上市年份']
|
1526
1531
|
|
1527
1532
|
df['上市年份'] = df['商品id'].apply(lambda x: check_year(x))
|
1528
|
-
p = df.pop('上市年份')
|
1529
|
-
df.insert(loc=5, column='上市年份', value=p)
|
1530
1533
|
set_typ = {
|
1531
1534
|
'日期': 'date',
|
1532
1535
|
'店铺id': 'bigint',
|
1533
1536
|
'店铺名称': 'varchar(255)',
|
1534
1537
|
'商家id': 'bigint',
|
1535
1538
|
'商品id': 'bigint',
|
1539
|
+
'上市年份': 'varchar(50)',
|
1536
1540
|
'商品标题': 'varchar(255)',
|
1537
1541
|
'商品链接': 'varchar(255)',
|
1538
1542
|
'商品图片': 'varchar(255)',
|
@@ -1540,7 +1544,6 @@ class MysqlDatasQuery:
|
|
1540
1544
|
'页面价': 'int',
|
1541
1545
|
'data_sku': 'varchar(1000)',
|
1542
1546
|
'更新时间': 'timestamp',
|
1543
|
-
'上市年份': 'varchar(50)',
|
1544
1547
|
}
|
1545
1548
|
return df, {
|
1546
1549
|
'db_name': db_name,
|
@@ -2267,7 +2270,7 @@ class MysqlDatasQuery:
|
|
2267
2270
|
'日期': 'date',
|
2268
2271
|
'店铺名称': 'varchar(100)',
|
2269
2272
|
'词类型': 'varchar(100)',
|
2270
|
-
'搜索词': 'varchar(
|
2273
|
+
'搜索词': 'varchar(255)',
|
2271
2274
|
'访客数': 'int',
|
2272
2275
|
'加购人数': 'int',
|
2273
2276
|
'支付金额': 'decimal(10,2)',
|
mdbq/mysql/uploader.py
CHANGED
@@ -625,11 +625,45 @@ class MySQLUploader:
|
|
625
625
|
根据列类型验证并转换数据值
|
626
626
|
"""
|
627
627
|
column_type_lower = column_type.lower() if column_type else ''
|
628
|
-
|
628
|
+
|
629
|
+
# JSON类型验证和转换(优先处理,避免pd.isna的问题)
|
630
|
+
if 'json' in column_type_lower:
|
631
|
+
if isinstance(value, (dict, list)):
|
632
|
+
try:
|
633
|
+
import json
|
634
|
+
return json.dumps(value, ensure_ascii=False)
|
635
|
+
except (TypeError, ValueError) as e:
|
636
|
+
logger.error(f"JSON序列化失败: {e}", {"库": db_name, "表": table_name, "列": col_name, "值": value})
|
637
|
+
raise ValueError(f"JSON序列化失败: {e}")
|
638
|
+
elif isinstance(value, str):
|
639
|
+
# 验证字符串是否为有效的JSON
|
640
|
+
try:
|
641
|
+
import json
|
642
|
+
json.loads(value)
|
643
|
+
return value
|
644
|
+
except (TypeError, ValueError) as e:
|
645
|
+
logger.error(f"无效的JSON字符串: {e}", {"库": db_name, "表": table_name, "列": col_name, "值": value})
|
646
|
+
raise ValueError(f"无效的JSON字符串: {e}")
|
647
|
+
else:
|
648
|
+
# 其他类型转换为字符串
|
649
|
+
return str(value)
|
650
|
+
|
651
|
+
# 统一判断None/NaN(排除列表和字典类型)
|
629
652
|
if value == '':
|
630
653
|
if any(t in column_type_lower for t in ['varchar', 'text', 'char', 'mediumtext', 'longtext']):
|
631
654
|
return ""
|
632
|
-
|
655
|
+
|
656
|
+
# 安全地检查NaN值,避免对列表和字典使用pd.isna
|
657
|
+
is_nan = False
|
658
|
+
if isinstance(value, (list, dict)):
|
659
|
+
is_nan = False # 列表和字典不是NaN
|
660
|
+
else:
|
661
|
+
try:
|
662
|
+
is_nan = pd.isna(value) or (isinstance(value, (float, Decimal)) and math.isinf(value))
|
663
|
+
except (ValueError, TypeError):
|
664
|
+
is_nan = False
|
665
|
+
|
666
|
+
if value == '' or is_nan:
|
633
667
|
# 兜底填充值映射
|
634
668
|
fallback_map = {
|
635
669
|
'int': 0,
|
@@ -1,7 +1,7 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
|
-
mdbq/__version__.py,sha256=
|
2
|
+
mdbq/__version__.py,sha256=_sPwKl25zozCOVLAfNKDLrJOiNvhHbBB2Oh22FfZsGk,18
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/query_data.py,sha256=
|
4
|
+
mdbq/aggregation/query_data.py,sha256=gQUcdrK0QCA0nEBkRA9zBieRWqSnkvpV5Eb-hXKw9Y8,170094
|
5
5
|
mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
|
6
6
|
mdbq/log/mylogger.py,sha256=kPe3wsQNaB1slfX-Z7VMqzZoMoqPfc7ylYXZDBeFzzI,24945
|
7
7
|
mdbq/myconf/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
@@ -11,7 +11,7 @@ mdbq/mysql/deduplicator.py,sha256=AB3gL7ZwhcmzGHSu4UY4M6YZVPFZ2wlAN3BCcwAhegQ,73
|
|
11
11
|
mdbq/mysql/mysql.py,sha256=pDg771xBugCMSTWeskIFTi3pFLgaqgyG3smzf-86Wn8,56772
|
12
12
|
mdbq/mysql/s_query.py,sha256=1wJ3HVjHEF6FA-bVeeesRlsf73CZSvVTEQ51CF1OsE4,46786
|
13
13
|
mdbq/mysql/unique_.py,sha256=MaztT-WIyEQUs-OOYY4pFulgHVcXR1BfCy3QUz0XM_U,21127
|
14
|
-
mdbq/mysql/uploader.py,sha256=
|
14
|
+
mdbq/mysql/uploader.py,sha256=NKwuBZcm4597_3gnY9M5m-jnD5joaVjf28twfQfP_jE,87955
|
15
15
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
16
16
|
mdbq/other/download_sku_picture.py,sha256=X66sVdvVgzoNzmgVJyPtd7bjEvctEKtLPblEPF65EWc,46940
|
17
17
|
mdbq/other/error_handler.py,sha256=4p5haAXSY-P78stp4Xwo_MwAngWYqyKj5ogWIuYXMeY,12631
|
@@ -25,7 +25,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
|
|
25
25
|
mdbq/redis/getredis.py,sha256=vpBuNc22uj9Vr-_Dh25_wpwWM1e-072EAAIBdB_IpL0,23494
|
26
26
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
27
27
|
mdbq/spider/aikucun.py,sha256=XptHjGzbout9IYzWAOQUpMMV5qEgLTU8pL1ZGt8oNEA,21868
|
28
|
-
mdbq-4.0.
|
29
|
-
mdbq-4.0.
|
30
|
-
mdbq-4.0.
|
31
|
-
mdbq-4.0.
|
28
|
+
mdbq-4.0.46.dist-info/METADATA,sha256=gwdYseVCsqG4wlrM-Upti194tEpmc1PB7TKshTGKI30,364
|
29
|
+
mdbq-4.0.46.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
30
|
+
mdbq-4.0.46.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
31
|
+
mdbq-4.0.46.dist-info/RECORD,,
|
File without changes
|
File without changes
|