mdbq 3.6.10__tar.gz → 3.6.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {mdbq-3.6.10 → mdbq-3.6.11}/PKG-INFO +1 -1
  2. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/redis/getredis.py +105 -13
  3. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq.egg-info/PKG-INFO +1 -1
  4. {mdbq-3.6.10 → mdbq-3.6.11}/setup.py +1 -1
  5. {mdbq-3.6.10 → mdbq-3.6.11}/README.txt +0 -0
  6. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/__init__.py +0 -0
  7. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/__version__.py +0 -0
  8. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/aggregation/__init__.py +0 -0
  9. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/aggregation/aggregation.py +0 -0
  10. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/aggregation/datashow.py +0 -0
  11. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/aggregation/optimize_data.py +0 -0
  12. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/aggregation/query_data.py +0 -0
  13. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/bdup/__init__.py +0 -0
  14. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/bdup/bdup.py +0 -0
  15. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/config/__init__.py +0 -0
  16. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/config/myconfig.py +0 -0
  17. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/config/products.py +0 -0
  18. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/config/set_support.py +0 -0
  19. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/dataframe/__init__.py +0 -0
  20. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/dataframe/converter.py +0 -0
  21. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/log/__init__.py +0 -0
  22. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/log/mylogger.py +0 -0
  23. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/mongo/__init__.py +0 -0
  24. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/mongo/mongo.py +0 -0
  25. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/mysql/__init__.py +0 -0
  26. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/mysql/mysql.py +0 -0
  27. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/mysql/mysql_bak.py +0 -0
  28. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/mysql/recheck_mysql.py +0 -0
  29. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/mysql/s_query.py +0 -0
  30. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/mysql/year_month_day.py +0 -0
  31. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/other/__init__.py +0 -0
  32. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/other/download_sku_picture.py +0 -0
  33. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/other/porxy.py +0 -0
  34. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/other/pov_city.py +0 -0
  35. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/other/sku_picture.py +0 -0
  36. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/other/ua_sj.py +0 -0
  37. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/pbix/__init__.py +0 -0
  38. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/pbix/pbix_refresh.py +0 -0
  39. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/pbix/refresh_all.py +0 -0
  40. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/pbix/refresh_all_old.py +0 -0
  41. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/redis/__init__.py +0 -0
  42. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/redis/getredis_/344/274/230/345/214/226hash.py" +0 -0
  43. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/spider/__init__.py +0 -0
  44. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq/spider/aikucun.py +0 -0
  45. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq.egg-info/SOURCES.txt +0 -0
  46. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq.egg-info/dependency_links.txt +0 -0
  47. {mdbq-3.6.10 → mdbq-3.6.11}/mdbq.egg-info/top_level.txt +0 -0
  48. {mdbq-3.6.10 → mdbq-3.6.11}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 3.6.10
3
+ Version: 3.6.11
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -343,7 +343,6 @@ class RedisDataHash(object):
343
343
  end_month = end_dt.to_period('M')
344
344
  months = pd.period_range(start_month, end_month, freq='M').strftime("%Y%m").tolist()
345
345
  cache_data = self._fetch_redis_data(cache_key, months)
346
-
347
346
  if cache_data.empty:
348
347
  self._trigger_async_cache_update(
349
348
  cache_key, db_name, table_name, set_year, start_date, end_date, cache_data
@@ -386,10 +385,26 @@ class RedisDataHash(object):
386
385
  combined_data = self._merge_data(new_data, existing_data)
387
386
 
388
387
  if not combined_data.empty:
389
- if '日期' not in combined_data.columns.tolist():
390
- serialized_data = self._serialize_data(combined_data)
391
- self.redis_engine.hset(cache_key, "all", serialized_data)
392
- self.redis_engine.expire(cache_key, self.cache_ttl)
388
+ if '日期' not in combined_data.columns:
389
+ # 原子化删除旧分片
390
+ # 优化分片存储性能
391
+ chunk_size = 5000
392
+ with self.redis_engine.pipeline(transaction=False) as pipe:
393
+ # 批量删除旧分片
394
+ for key in self.redis_engine.hscan_iter(cache_key, match="all_*"):
395
+ pipe.hdel(cache_key, key[0])
396
+
397
+ # 批量写入新分片
398
+ for idx in range(0, len(combined_data), chunk_size):
399
+ chunk = combined_data.iloc[idx:idx + chunk_size]
400
+ chunk_key = f"all_{idx // chunk_size:04d}"
401
+ pipe.hset(cache_key, chunk_key, self._serialize_data(chunk))
402
+
403
+ pipe.expire(cache_key, self.cache_ttl)
404
+ pipe.execute()
405
+ # serialized_data = self._serialize_data(combined_data)
406
+ # self.redis_engine.hset(cache_key, "all", serialized_data)
407
+ # self.redis_engine.expire(cache_key, self.cache_ttl)
393
408
  else:
394
409
  # 按月分片存储
395
410
  combined_data['month'] = combined_data['日期'].dt.to_period('M').dt.strftime("%Y%m")
@@ -422,6 +437,69 @@ class RedisDataHash(object):
422
437
  return pd.DataFrame()
423
438
 
424
439
  def _fetch_redis_data(self, cache_key: str, months: list = None) -> pd.DataFrame:
440
+ try:
441
+ dfs = []
442
+
443
+ if months is not None:
444
+ # 1. 获取指定月份数据
445
+ month_fields = months.copy()
446
+ month_data = self.redis_engine.hmget(cache_key, month_fields)
447
+
448
+ # 处理月份数据
449
+ for data, field in zip(month_data, month_fields):
450
+ if data:
451
+ try:
452
+ df = pd.DataFrame(json.loads(data.decode("utf-8")))
453
+ df = self._convert_date_columns(df)
454
+ dfs.append(df)
455
+ except Exception as e:
456
+ logger.error(f"月份数据解析失败 {field}: {e}")
457
+
458
+ # 2. 获取所有分片数据
459
+ # 优化分片数据获取
460
+ pipeline = self.redis_engine.pipeline()
461
+ cursor, keys = self.redis_engine.hscan(cache_key, match="all_*")
462
+ while True:
463
+ for key in keys:
464
+ pipeline.hget(cache_key, key)
465
+ if cursor == 0:
466
+ break
467
+ cursor, keys = self.redis_engine.hscan(cache_key, cursor=cursor, match="all_*")
468
+ shard_values = pipeline.execute()
469
+
470
+ # 处理分片数据
471
+ for value in shard_values:
472
+ if value:
473
+ try:
474
+ df = pd.DataFrame(json.loads(value.decode("utf-8")))
475
+ dfs.append(self._convert_date_columns(df))
476
+ except Exception as e:
477
+ logger.error(f"分片数据解析失败: {e}")
478
+
479
+ else:
480
+ # 原有全量获取逻辑保持不变
481
+ data_dict = self.redis_engine.hgetall(cache_key)
482
+ for field, data in data_dict.items():
483
+ try:
484
+ df = pd.DataFrame(json.loads(data.decode("utf-8")))
485
+ df = self._convert_date_columns(df)
486
+ dfs.append(df)
487
+ except Exception as e:
488
+ logger.error(f"Redis 数据解析失败 {field.decode()}: {e}")
489
+
490
+ # 统一合并和排序处理
491
+ if dfs:
492
+ final_df = pd.concat(dfs, ignore_index=True)
493
+ if '日期' in final_df.columns:
494
+ final_df = final_df.sort_values('日期', ascending=False)
495
+ return final_df
496
+ return pd.DataFrame()
497
+
498
+ except Exception as e:
499
+ logger.error(f"Redis 数据获取失败 {cache_key}: {e}")
500
+ return pd.DataFrame()
501
+
502
+ def _fetch_redis_data_bak(self, cache_key: str, months: list = None) -> pd.DataFrame:
425
503
  try:
426
504
  if months is not None:
427
505
  fields = months.copy()
@@ -435,15 +513,29 @@ class RedisDataHash(object):
435
513
  dfs.append(df)
436
514
  return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
437
515
  else:
438
- data_dict = self.redis_engine.hgetall(cache_key)
516
+ # 优化分片数据获取
517
+ cursor, data = self.redis_engine.hscan(cache_key, match="all_*")
439
518
  dfs = []
440
- for field, data in data_dict.items():
441
- try:
442
- df = pd.DataFrame(json.loads(data.decode("utf-8")))
443
- df = self._convert_date_columns(df)
444
- dfs.append(df)
445
- except Exception as e:
446
- logger.error(f"Redis 数据解析失败 {cache_key} 字段 {field}: {e}")
519
+ while True:
520
+ for field, value in data.items():
521
+ try:
522
+ df = pd.DataFrame(json.loads(value))
523
+ dfs.append(self._convert_date_columns(df))
524
+ except Exception as e:
525
+ logger.error(f"分片解析失败 {field}: {e}")
526
+ if cursor == 0:
527
+ break
528
+ cursor, data = self.redis_engine.hscan(cache_key, cursor=cursor, match="all_*")
529
+ return pd.concat(dfs) if dfs else pd.DataFrame()
530
+ # data_dict = self.redis_engine.hgetall(cache_key)
531
+ # dfs = []
532
+ # for field, data in data_dict.items():
533
+ # try:
534
+ # df = pd.DataFrame(json.loads(data.decode("utf-8")))
535
+ # df = self._convert_date_columns(df)
536
+ # dfs.append(df)
537
+ # except Exception as e:
538
+ # logger.error(f"Redis 数据解析失败 {cache_key} 字段 {field}: {e}")
447
539
  return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
448
540
  except Exception as e:
449
541
  logger.error(f"Redis 数据获取失败 {cache_key}: {e}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 3.6.10
3
+ Version: 3.6.11
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -3,7 +3,7 @@
3
3
  from setuptools import setup, find_packages
4
4
 
5
5
  setup(name='mdbq',
6
- version='3.6.10',
6
+ version='3.6.11',
7
7
  author='xigua, ',
8
8
  author_email="2587125111@qq.com",
9
9
  url='https://pypi.org/project/mdbq',
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes