mdbq 3.6.10__py3-none-any.whl → 3.6.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/redis/getredis.py
CHANGED
@@ -343,7 +343,6 @@ class RedisDataHash(object):
|
|
343
343
|
end_month = end_dt.to_period('M')
|
344
344
|
months = pd.period_range(start_month, end_month, freq='M').strftime("%Y%m").tolist()
|
345
345
|
cache_data = self._fetch_redis_data(cache_key, months)
|
346
|
-
|
347
346
|
if cache_data.empty:
|
348
347
|
self._trigger_async_cache_update(
|
349
348
|
cache_key, db_name, table_name, set_year, start_date, end_date, cache_data
|
@@ -386,10 +385,26 @@ class RedisDataHash(object):
|
|
386
385
|
combined_data = self._merge_data(new_data, existing_data)
|
387
386
|
|
388
387
|
if not combined_data.empty:
|
389
|
-
if '日期' not in combined_data.columns
|
390
|
-
|
391
|
-
|
392
|
-
|
388
|
+
if '日期' not in combined_data.columns:
|
389
|
+
# 原子化删除旧分片
|
390
|
+
# 优化分片存储性能
|
391
|
+
chunk_size = 5000
|
392
|
+
with self.redis_engine.pipeline(transaction=False) as pipe:
|
393
|
+
# 批量删除旧分片
|
394
|
+
for key in self.redis_engine.hscan_iter(cache_key, match="all_*"):
|
395
|
+
pipe.hdel(cache_key, key[0])
|
396
|
+
|
397
|
+
# 批量写入新分片
|
398
|
+
for idx in range(0, len(combined_data), chunk_size):
|
399
|
+
chunk = combined_data.iloc[idx:idx + chunk_size]
|
400
|
+
chunk_key = f"all_{idx // chunk_size:04d}"
|
401
|
+
pipe.hset(cache_key, chunk_key, self._serialize_data(chunk))
|
402
|
+
|
403
|
+
pipe.expire(cache_key, self.cache_ttl)
|
404
|
+
pipe.execute()
|
405
|
+
# serialized_data = self._serialize_data(combined_data)
|
406
|
+
# self.redis_engine.hset(cache_key, "all", serialized_data)
|
407
|
+
# self.redis_engine.expire(cache_key, self.cache_ttl)
|
393
408
|
else:
|
394
409
|
# 按月分片存储
|
395
410
|
combined_data['month'] = combined_data['日期'].dt.to_period('M').dt.strftime("%Y%m")
|
@@ -422,6 +437,69 @@ class RedisDataHash(object):
|
|
422
437
|
return pd.DataFrame()
|
423
438
|
|
424
439
|
def _fetch_redis_data(self, cache_key: str, months: list = None) -> pd.DataFrame:
|
440
|
+
try:
|
441
|
+
dfs = []
|
442
|
+
|
443
|
+
if months is not None:
|
444
|
+
# 1. 获取指定月份数据
|
445
|
+
month_fields = months.copy()
|
446
|
+
month_data = self.redis_engine.hmget(cache_key, month_fields)
|
447
|
+
|
448
|
+
# 处理月份数据
|
449
|
+
for data, field in zip(month_data, month_fields):
|
450
|
+
if data:
|
451
|
+
try:
|
452
|
+
df = pd.DataFrame(json.loads(data.decode("utf-8")))
|
453
|
+
df = self._convert_date_columns(df)
|
454
|
+
dfs.append(df)
|
455
|
+
except Exception as e:
|
456
|
+
logger.error(f"月份数据解析失败 {field}: {e}")
|
457
|
+
|
458
|
+
# 2. 获取所有分片数据
|
459
|
+
# 优化分片数据获取
|
460
|
+
pipeline = self.redis_engine.pipeline()
|
461
|
+
cursor, keys = self.redis_engine.hscan(cache_key, match="all_*")
|
462
|
+
while True:
|
463
|
+
for key in keys:
|
464
|
+
pipeline.hget(cache_key, key)
|
465
|
+
if cursor == 0:
|
466
|
+
break
|
467
|
+
cursor, keys = self.redis_engine.hscan(cache_key, cursor=cursor, match="all_*")
|
468
|
+
shard_values = pipeline.execute()
|
469
|
+
|
470
|
+
# 处理分片数据
|
471
|
+
for value in shard_values:
|
472
|
+
if value:
|
473
|
+
try:
|
474
|
+
df = pd.DataFrame(json.loads(value.decode("utf-8")))
|
475
|
+
dfs.append(self._convert_date_columns(df))
|
476
|
+
except Exception as e:
|
477
|
+
logger.error(f"分片数据解析失败: {e}")
|
478
|
+
|
479
|
+
else:
|
480
|
+
# 原有全量获取逻辑保持不变
|
481
|
+
data_dict = self.redis_engine.hgetall(cache_key)
|
482
|
+
for field, data in data_dict.items():
|
483
|
+
try:
|
484
|
+
df = pd.DataFrame(json.loads(data.decode("utf-8")))
|
485
|
+
df = self._convert_date_columns(df)
|
486
|
+
dfs.append(df)
|
487
|
+
except Exception as e:
|
488
|
+
logger.error(f"Redis 数据解析失败 {field.decode()}: {e}")
|
489
|
+
|
490
|
+
# 统一合并和排序处理
|
491
|
+
if dfs:
|
492
|
+
final_df = pd.concat(dfs, ignore_index=True)
|
493
|
+
if '日期' in final_df.columns:
|
494
|
+
final_df = final_df.sort_values('日期', ascending=False)
|
495
|
+
return final_df
|
496
|
+
return pd.DataFrame()
|
497
|
+
|
498
|
+
except Exception as e:
|
499
|
+
logger.error(f"Redis 数据获取失败 {cache_key}: {e}")
|
500
|
+
return pd.DataFrame()
|
501
|
+
|
502
|
+
def _fetch_redis_data_bak(self, cache_key: str, months: list = None) -> pd.DataFrame:
|
425
503
|
try:
|
426
504
|
if months is not None:
|
427
505
|
fields = months.copy()
|
@@ -435,15 +513,29 @@ class RedisDataHash(object):
|
|
435
513
|
dfs.append(df)
|
436
514
|
return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
|
437
515
|
else:
|
438
|
-
|
516
|
+
# 优化分片数据获取
|
517
|
+
cursor, data = self.redis_engine.hscan(cache_key, match="all_*")
|
439
518
|
dfs = []
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
519
|
+
while True:
|
520
|
+
for field, value in data.items():
|
521
|
+
try:
|
522
|
+
df = pd.DataFrame(json.loads(value))
|
523
|
+
dfs.append(self._convert_date_columns(df))
|
524
|
+
except Exception as e:
|
525
|
+
logger.error(f"分片解析失败 {field}: {e}")
|
526
|
+
if cursor == 0:
|
527
|
+
break
|
528
|
+
cursor, data = self.redis_engine.hscan(cache_key, cursor=cursor, match="all_*")
|
529
|
+
return pd.concat(dfs) if dfs else pd.DataFrame()
|
530
|
+
# data_dict = self.redis_engine.hgetall(cache_key)
|
531
|
+
# dfs = []
|
532
|
+
# for field, data in data_dict.items():
|
533
|
+
# try:
|
534
|
+
# df = pd.DataFrame(json.loads(data.decode("utf-8")))
|
535
|
+
# df = self._convert_date_columns(df)
|
536
|
+
# dfs.append(df)
|
537
|
+
# except Exception as e:
|
538
|
+
# logger.error(f"Redis 数据解析失败 {cache_key} 字段 {field}: {e}")
|
447
539
|
return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
|
448
540
|
except Exception as e:
|
449
541
|
logger.error(f"Redis 数据获取失败 {cache_key}: {e}")
|
@@ -34,11 +34,11 @@ mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,239
|
|
34
34
|
mdbq/pbix/refresh_all.py,sha256=OBT9EewSZ0aRS9vL_FflVn74d4l2G00wzHiikCC4TC0,5926
|
35
35
|
mdbq/pbix/refresh_all_old.py,sha256=_pq3WSQ728GPtEG5pfsZI2uTJhU8D6ra-htIk1JXYzw,7192
|
36
36
|
mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
|
37
|
-
mdbq/redis/getredis.py,sha256=
|
37
|
+
mdbq/redis/getredis.py,sha256=QAiqkxgrQf6AHgWQdIKah3FKkM5HE8TqwJdTXrlyR6c,28427
|
38
38
|
mdbq/redis/getredis_优化hash.py,sha256=q7omKJCPw_6Zr_r6WwTv4RGSXzZzpLPkIaqJ22svJhE,29104
|
39
39
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
40
40
|
mdbq/spider/aikucun.py,sha256=v7VO5gtEXR6_4Q6ujbTyu1FHu7TXHcwSQ6hIO249YH0,22208
|
41
|
-
mdbq-3.6.
|
42
|
-
mdbq-3.6.
|
43
|
-
mdbq-3.6.
|
44
|
-
mdbq-3.6.
|
41
|
+
mdbq-3.6.11.dist-info/METADATA,sha256=nf9h8l9QqT6ZrZ-J4cassVWcqRi2r3Oicu9eicCtCaA,244
|
42
|
+
mdbq-3.6.11.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
|
43
|
+
mdbq-3.6.11.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
44
|
+
mdbq-3.6.11.dist-info/RECORD,,
|
File without changes
|
File without changes
|