mdbq 3.6.10__py3-none-any.whl → 3.6.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/redis/getredis.py CHANGED
@@ -343,7 +343,6 @@ class RedisDataHash(object):
343
343
  end_month = end_dt.to_period('M')
344
344
  months = pd.period_range(start_month, end_month, freq='M').strftime("%Y%m").tolist()
345
345
  cache_data = self._fetch_redis_data(cache_key, months)
346
-
347
346
  if cache_data.empty:
348
347
  self._trigger_async_cache_update(
349
348
  cache_key, db_name, table_name, set_year, start_date, end_date, cache_data
@@ -386,10 +385,26 @@ class RedisDataHash(object):
386
385
  combined_data = self._merge_data(new_data, existing_data)
387
386
 
388
387
  if not combined_data.empty:
389
- if '日期' not in combined_data.columns.tolist():
390
- serialized_data = self._serialize_data(combined_data)
391
- self.redis_engine.hset(cache_key, "all", serialized_data)
392
- self.redis_engine.expire(cache_key, self.cache_ttl)
388
+ if '日期' not in combined_data.columns:
389
+ # 原子化删除旧分片
390
+ # 优化分片存储性能
391
+ chunk_size = 5000
392
+ with self.redis_engine.pipeline(transaction=False) as pipe:
393
+ # 批量删除旧分片
394
+ for key in self.redis_engine.hscan_iter(cache_key, match="all_*"):
395
+ pipe.hdel(cache_key, key[0])
396
+
397
+ # 批量写入新分片
398
+ for idx in range(0, len(combined_data), chunk_size):
399
+ chunk = combined_data.iloc[idx:idx + chunk_size]
400
+ chunk_key = f"all_{idx // chunk_size:04d}"
401
+ pipe.hset(cache_key, chunk_key, self._serialize_data(chunk))
402
+
403
+ pipe.expire(cache_key, self.cache_ttl)
404
+ pipe.execute()
405
+ # serialized_data = self._serialize_data(combined_data)
406
+ # self.redis_engine.hset(cache_key, "all", serialized_data)
407
+ # self.redis_engine.expire(cache_key, self.cache_ttl)
393
408
  else:
394
409
  # 按月分片存储
395
410
  combined_data['month'] = combined_data['日期'].dt.to_period('M').dt.strftime("%Y%m")
@@ -422,6 +437,69 @@ class RedisDataHash(object):
422
437
  return pd.DataFrame()
423
438
 
424
439
  def _fetch_redis_data(self, cache_key: str, months: list = None) -> pd.DataFrame:
440
+ try:
441
+ dfs = []
442
+
443
+ if months is not None:
444
+ # 1. 获取指定月份数据
445
+ month_fields = months.copy()
446
+ month_data = self.redis_engine.hmget(cache_key, month_fields)
447
+
448
+ # 处理月份数据
449
+ for data, field in zip(month_data, month_fields):
450
+ if data:
451
+ try:
452
+ df = pd.DataFrame(json.loads(data.decode("utf-8")))
453
+ df = self._convert_date_columns(df)
454
+ dfs.append(df)
455
+ except Exception as e:
456
+ logger.error(f"月份数据解析失败 {field}: {e}")
457
+
458
+ # 2. 获取所有分片数据
459
+ # 优化分片数据获取
460
+ pipeline = self.redis_engine.pipeline()
461
+ cursor, keys = self.redis_engine.hscan(cache_key, match="all_*")
462
+ while True:
463
+ for key in keys:
464
+ pipeline.hget(cache_key, key)
465
+ if cursor == 0:
466
+ break
467
+ cursor, keys = self.redis_engine.hscan(cache_key, cursor=cursor, match="all_*")
468
+ shard_values = pipeline.execute()
469
+
470
+ # 处理分片数据
471
+ for value in shard_values:
472
+ if value:
473
+ try:
474
+ df = pd.DataFrame(json.loads(value.decode("utf-8")))
475
+ dfs.append(self._convert_date_columns(df))
476
+ except Exception as e:
477
+ logger.error(f"分片数据解析失败: {e}")
478
+
479
+ else:
480
+ # 原有全量获取逻辑保持不变
481
+ data_dict = self.redis_engine.hgetall(cache_key)
482
+ for field, data in data_dict.items():
483
+ try:
484
+ df = pd.DataFrame(json.loads(data.decode("utf-8")))
485
+ df = self._convert_date_columns(df)
486
+ dfs.append(df)
487
+ except Exception as e:
488
+ logger.error(f"Redis 数据解析失败 {field.decode()}: {e}")
489
+
490
+ # 统一合并和排序处理
491
+ if dfs:
492
+ final_df = pd.concat(dfs, ignore_index=True)
493
+ if '日期' in final_df.columns:
494
+ final_df = final_df.sort_values('日期', ascending=False)
495
+ return final_df
496
+ return pd.DataFrame()
497
+
498
+ except Exception as e:
499
+ logger.error(f"Redis 数据获取失败 {cache_key}: {e}")
500
+ return pd.DataFrame()
501
+
502
+ def _fetch_redis_data_bak(self, cache_key: str, months: list = None) -> pd.DataFrame:
425
503
  try:
426
504
  if months is not None:
427
505
  fields = months.copy()
@@ -435,15 +513,29 @@ class RedisDataHash(object):
435
513
  dfs.append(df)
436
514
  return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
437
515
  else:
438
- data_dict = self.redis_engine.hgetall(cache_key)
516
+ # 优化分片数据获取
517
+ cursor, data = self.redis_engine.hscan(cache_key, match="all_*")
439
518
  dfs = []
440
- for field, data in data_dict.items():
441
- try:
442
- df = pd.DataFrame(json.loads(data.decode("utf-8")))
443
- df = self._convert_date_columns(df)
444
- dfs.append(df)
445
- except Exception as e:
446
- logger.error(f"Redis 数据解析失败 {cache_key} 字段 {field}: {e}")
519
+ while True:
520
+ for field, value in data.items():
521
+ try:
522
+ df = pd.DataFrame(json.loads(value))
523
+ dfs.append(self._convert_date_columns(df))
524
+ except Exception as e:
525
+ logger.error(f"分片解析失败 {field}: {e}")
526
+ if cursor == 0:
527
+ break
528
+ cursor, data = self.redis_engine.hscan(cache_key, cursor=cursor, match="all_*")
529
+ return pd.concat(dfs) if dfs else pd.DataFrame()
530
+ # data_dict = self.redis_engine.hgetall(cache_key)
531
+ # dfs = []
532
+ # for field, data in data_dict.items():
533
+ # try:
534
+ # df = pd.DataFrame(json.loads(data.decode("utf-8")))
535
+ # df = self._convert_date_columns(df)
536
+ # dfs.append(df)
537
+ # except Exception as e:
538
+ # logger.error(f"Redis 数据解析失败 {cache_key} 字段 {field}: {e}")
447
539
  return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
448
540
  except Exception as e:
449
541
  logger.error(f"Redis 数据获取失败 {cache_key}: {e}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 3.6.10
3
+ Version: 3.6.11
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -34,11 +34,11 @@ mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,239
34
34
  mdbq/pbix/refresh_all.py,sha256=OBT9EewSZ0aRS9vL_FflVn74d4l2G00wzHiikCC4TC0,5926
35
35
  mdbq/pbix/refresh_all_old.py,sha256=_pq3WSQ728GPtEG5pfsZI2uTJhU8D6ra-htIk1JXYzw,7192
36
36
  mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
37
- mdbq/redis/getredis.py,sha256=pBgRyUrRmOlW-oXry3Hat9GahZgljvidNEDZJFn-geU,23932
37
+ mdbq/redis/getredis.py,sha256=QAiqkxgrQf6AHgWQdIKah3FKkM5HE8TqwJdTXrlyR6c,28427
38
38
  mdbq/redis/getredis_优化hash.py,sha256=q7omKJCPw_6Zr_r6WwTv4RGSXzZzpLPkIaqJ22svJhE,29104
39
39
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
40
40
  mdbq/spider/aikucun.py,sha256=v7VO5gtEXR6_4Q6ujbTyu1FHu7TXHcwSQ6hIO249YH0,22208
41
- mdbq-3.6.10.dist-info/METADATA,sha256=D8ooXZMsVBNM_wbcXjE4xq2wHJU200gXHbEPkRpKioA,244
42
- mdbq-3.6.10.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
43
- mdbq-3.6.10.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
44
- mdbq-3.6.10.dist-info/RECORD,,
41
+ mdbq-3.6.11.dist-info/METADATA,sha256=nf9h8l9QqT6ZrZ-J4cassVWcqRi2r3Oicu9eicCtCaA,244
42
+ mdbq-3.6.11.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
43
+ mdbq-3.6.11.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
44
+ mdbq-3.6.11.dist-info/RECORD,,
File without changes