mdbq 4.0.44__py3-none-any.whl → 4.0.46__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/__version__.py CHANGED
@@ -1 +1 @@
1
- VERSION = '4.0.44'
1
+ VERSION = '4.0.46'
@@ -1514,25 +1514,29 @@ class MysqlDatasQuery:
1514
1514
  end_date=end_date,
1515
1515
  projection=projection,
1516
1516
  )
1517
+ df.pop('data_sku')
1517
1518
  df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore') # 转换日期列
1518
1519
  df_set['商品id'] = df_set['商品id'].astype('int64')
1519
1520
  df['商品id'] = df['商品id'].astype('int64')
1520
1521
  df_set.sort_values('商品id', ascending=False, ignore_index=True, inplace=True)
1521
1522
 
1523
+ # 仅保留最新日期的数据
1524
+ idx = df.groupby(['商品id'])['更新时间'].idxmax()
1525
+ df = df.loc[idx]
1526
+
1522
1527
  def check_year(item_id):
1523
1528
  for item in df_set.to_dict(orient='records'):
1524
1529
  if item_id > item['商品id']:
1525
1530
  return item['上市年份']
1526
1531
 
1527
1532
  df['上市年份'] = df['商品id'].apply(lambda x: check_year(x))
1528
- p = df.pop('上市年份')
1529
- df.insert(loc=5, column='上市年份', value=p)
1530
1533
  set_typ = {
1531
1534
  '日期': 'date',
1532
1535
  '店铺id': 'bigint',
1533
1536
  '店铺名称': 'varchar(255)',
1534
1537
  '商家id': 'bigint',
1535
1538
  '商品id': 'bigint',
1539
+ '上市年份': 'varchar(50)',
1536
1540
  '商品标题': 'varchar(255)',
1537
1541
  '商品链接': 'varchar(255)',
1538
1542
  '商品图片': 'varchar(255)',
@@ -1540,7 +1544,6 @@ class MysqlDatasQuery:
1540
1544
  '页面价': 'int',
1541
1545
  'data_sku': 'varchar(1000)',
1542
1546
  '更新时间': 'timestamp',
1543
- '上市年份': 'varchar(50)',
1544
1547
  }
1545
1548
  return df, {
1546
1549
  'db_name': db_name,
@@ -2267,7 +2270,7 @@ class MysqlDatasQuery:
2267
2270
  '日期': 'date',
2268
2271
  '店铺名称': 'varchar(100)',
2269
2272
  '词类型': 'varchar(100)',
2270
- '搜索词': 'varchar(100)',
2273
+ '搜索词': 'varchar(255)',
2271
2274
  '访客数': 'int',
2272
2275
  '加购人数': 'int',
2273
2276
  '支付金额': 'decimal(10,2)',
mdbq/mysql/uploader.py CHANGED
@@ -625,11 +625,45 @@ class MySQLUploader:
625
625
  根据列类型验证并转换数据值
626
626
  """
627
627
  column_type_lower = column_type.lower() if column_type else ''
628
- # 统一判断None/NaN
628
+
629
+ # JSON类型验证和转换(优先处理,避免pd.isna的问题)
630
+ if 'json' in column_type_lower:
631
+ if isinstance(value, (dict, list)):
632
+ try:
633
+ import json
634
+ return json.dumps(value, ensure_ascii=False)
635
+ except (TypeError, ValueError) as e:
636
+ logger.error(f"JSON序列化失败: {e}", {"库": db_name, "表": table_name, "列": col_name, "值": value})
637
+ raise ValueError(f"JSON序列化失败: {e}")
638
+ elif isinstance(value, str):
639
+ # 验证字符串是否为有效的JSON
640
+ try:
641
+ import json
642
+ json.loads(value)
643
+ return value
644
+ except (TypeError, ValueError) as e:
645
+ logger.error(f"无效的JSON字符串: {e}", {"库": db_name, "表": table_name, "列": col_name, "值": value})
646
+ raise ValueError(f"无效的JSON字符串: {e}")
647
+ else:
648
+ # 其他类型转换为字符串
649
+ return str(value)
650
+
651
+ # 统一判断None/NaN(排除列表和字典类型)
629
652
  if value == '':
630
653
  if any(t in column_type_lower for t in ['varchar', 'text', 'char', 'mediumtext', 'longtext']):
631
654
  return ""
632
- if value == '' or pd.isna(value) or (isinstance(value, (float, Decimal)) and math.isinf(value)):
655
+
656
+ # 安全地检查NaN值,避免对列表和字典使用pd.isna
657
+ is_nan = False
658
+ if isinstance(value, (list, dict)):
659
+ is_nan = False # 列表和字典不是NaN
660
+ else:
661
+ try:
662
+ is_nan = pd.isna(value) or (isinstance(value, (float, Decimal)) and math.isinf(value))
663
+ except (ValueError, TypeError):
664
+ is_nan = False
665
+
666
+ if value == '' or is_nan:
633
667
  # 兜底填充值映射
634
668
  fallback_map = {
635
669
  'int': 0,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 4.0.44
3
+ Version: 4.0.46
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,7 +1,7 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
- mdbq/__version__.py,sha256=Yq_JgKwKONwVexEyE66trDbripXgbesAkvt1eQ___20,18
2
+ mdbq/__version__.py,sha256=_sPwKl25zozCOVLAfNKDLrJOiNvhHbBB2Oh22FfZsGk,18
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
- mdbq/aggregation/query_data.py,sha256=ZWLJghNiEtyA4rvgUqMCLorY0R4-Likd6i4mVMuOni0,170025
4
+ mdbq/aggregation/query_data.py,sha256=gQUcdrK0QCA0nEBkRA9zBieRWqSnkvpV5Eb-hXKw9Y8,170094
5
5
  mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
6
6
  mdbq/log/mylogger.py,sha256=kPe3wsQNaB1slfX-Z7VMqzZoMoqPfc7ylYXZDBeFzzI,24945
7
7
  mdbq/myconf/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
@@ -11,7 +11,7 @@ mdbq/mysql/deduplicator.py,sha256=AB3gL7ZwhcmzGHSu4UY4M6YZVPFZ2wlAN3BCcwAhegQ,73
11
11
  mdbq/mysql/mysql.py,sha256=pDg771xBugCMSTWeskIFTi3pFLgaqgyG3smzf-86Wn8,56772
12
12
  mdbq/mysql/s_query.py,sha256=1wJ3HVjHEF6FA-bVeeesRlsf73CZSvVTEQ51CF1OsE4,46786
13
13
  mdbq/mysql/unique_.py,sha256=MaztT-WIyEQUs-OOYY4pFulgHVcXR1BfCy3QUz0XM_U,21127
14
- mdbq/mysql/uploader.py,sha256=LPfYEj7ywoAynY2Nl9gg0IurgIWd_bTwkda2ifD1TeE,86443
14
+ mdbq/mysql/uploader.py,sha256=NKwuBZcm4597_3gnY9M5m-jnD5joaVjf28twfQfP_jE,87955
15
15
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
16
16
  mdbq/other/download_sku_picture.py,sha256=X66sVdvVgzoNzmgVJyPtd7bjEvctEKtLPblEPF65EWc,46940
17
17
  mdbq/other/error_handler.py,sha256=4p5haAXSY-P78stp4Xwo_MwAngWYqyKj5ogWIuYXMeY,12631
@@ -25,7 +25,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
25
25
  mdbq/redis/getredis.py,sha256=vpBuNc22uj9Vr-_Dh25_wpwWM1e-072EAAIBdB_IpL0,23494
26
26
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
27
27
  mdbq/spider/aikucun.py,sha256=XptHjGzbout9IYzWAOQUpMMV5qEgLTU8pL1ZGt8oNEA,21868
28
- mdbq-4.0.44.dist-info/METADATA,sha256=6BCrO5mef08KVeODcA3rgk-gOsNtI_8_CTh0mghCKeE,364
29
- mdbq-4.0.44.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
30
- mdbq-4.0.44.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
31
- mdbq-4.0.44.dist-info/RECORD,,
28
+ mdbq-4.0.46.dist-info/METADATA,sha256=gwdYseVCsqG4wlrM-Upti194tEpmc1PB7TKshTGKI30,364
29
+ mdbq-4.0.46.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
30
+ mdbq-4.0.46.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
31
+ mdbq-4.0.46.dist-info/RECORD,,
File without changes