mdbq 3.6.9__tar.gz → 3.6.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {mdbq-3.6.9 → mdbq-3.6.10}/PKG-INFO +1 -1
  2. mdbq-3.6.10/mdbq/redis/getredis.py +605 -0
  3. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq.egg-info/PKG-INFO +1 -1
  4. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq.egg-info/SOURCES.txt +1 -0
  5. {mdbq-3.6.9 → mdbq-3.6.10}/setup.py +1 -1
  6. {mdbq-3.6.9 → mdbq-3.6.10}/README.txt +0 -0
  7. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/__init__.py +0 -0
  8. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/__version__.py +0 -0
  9. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/aggregation/__init__.py +0 -0
  10. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/aggregation/aggregation.py +0 -0
  11. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/aggregation/datashow.py +0 -0
  12. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/aggregation/optimize_data.py +0 -0
  13. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/aggregation/query_data.py +0 -0
  14. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/bdup/__init__.py +0 -0
  15. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/bdup/bdup.py +0 -0
  16. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/config/__init__.py +0 -0
  17. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/config/myconfig.py +0 -0
  18. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/config/products.py +0 -0
  19. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/config/set_support.py +0 -0
  20. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/dataframe/__init__.py +0 -0
  21. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/dataframe/converter.py +0 -0
  22. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/log/__init__.py +0 -0
  23. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/log/mylogger.py +0 -0
  24. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/mongo/__init__.py +0 -0
  25. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/mongo/mongo.py +0 -0
  26. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/mysql/__init__.py +0 -0
  27. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/mysql/mysql.py +0 -0
  28. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/mysql/mysql_bak.py +0 -0
  29. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/mysql/recheck_mysql.py +0 -0
  30. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/mysql/s_query.py +0 -0
  31. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/mysql/year_month_day.py +0 -0
  32. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/other/__init__.py +0 -0
  33. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/other/download_sku_picture.py +0 -0
  34. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/other/porxy.py +0 -0
  35. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/other/pov_city.py +0 -0
  36. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/other/sku_picture.py +0 -0
  37. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/other/ua_sj.py +0 -0
  38. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/pbix/__init__.py +0 -0
  39. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/pbix/pbix_refresh.py +0 -0
  40. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/pbix/refresh_all.py +0 -0
  41. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/pbix/refresh_all_old.py +0 -0
  42. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/redis/__init__.py +0 -0
  43. /mdbq-3.6.9/mdbq/redis/getredis.py → /mdbq-3.6.10/mdbq/redis/getredis_/344/274/230/345/214/226hash.py" +0 -0
  44. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/spider/__init__.py +0 -0
  45. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq/spider/aikucun.py +0 -0
  46. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq.egg-info/dependency_links.txt +0 -0
  47. {mdbq-3.6.9 → mdbq-3.6.10}/mdbq.egg-info/top_level.txt +0 -0
  48. {mdbq-3.6.9 → mdbq-3.6.10}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 3.6.9
3
+ Version: 3.6.10
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -0,0 +1,605 @@
1
+ # -*- coding: UTF-8 –*-
2
+ import os.path
3
+ import redis
4
+ import socket
5
+ from mdbq.mysql import s_query
6
+ from mdbq.config import myconfig
7
+ import pandas as pd
8
+ import json
9
+ import datetime
10
+ import threading
11
+ import logging
12
+ from logging.handlers import RotatingFileHandler
13
+ import getpass
14
+ import platform
15
+ from decimal import Decimal
16
+
17
+ if platform.system() == 'Windows':
18
+ D_PATH = os.path.join(f'C:\\Users\\{getpass.getuser()}\\Downloads')
19
+ else:
20
+ D_PATH = os.path.join(f'/Users/{getpass.getuser()}/Downloads')
21
+
22
+
23
+ if socket.gethostname() == 'company' or socket.gethostname() == 'Mac2.local':
24
+ conf = myconfig.main()
25
+ conf_data = conf['Windows']['company']['mysql']['local']
26
+ username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
27
+ redis_password = conf['Windows']['company']['redis']['local']['password']
28
+ elif socket.gethostname() == 'MacBookPro':
29
+ conf = myconfig.main()
30
+ conf_data = conf['Windows']['xigua_lx']['mysql']['local']
31
+ username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
32
+ redis_password = conf['Windows']['company']['redis']['local']['password']
33
+ else:
34
+ conf = myconfig.main()
35
+ conf_data = conf['Windows']['xigua_lx']['mysql']['local']
36
+ username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
37
+ redis_password = conf['Windows']['company']['redis']['local']['password'] # redis 使用本地数据,全部机子相同
38
+
39
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(message)s')
40
+
41
+ # 获取当前模块的日志记录器
42
+ logger = logging.getLogger(__name__)
43
+
44
+ # 创建一个文件处理器,用于将日志写入文件
45
+ # file_handler = logging.FileHandler(os.path.join(D_PATH, 'logfile', 'redis.log'))
46
+ if not os.path.isdir(os.path.join(D_PATH, 'logfile')):
47
+ os.makedirs(os.path.join(D_PATH, 'logfile'))
48
+ log_file = os.path.join(D_PATH, 'logfile', 'redis.log')
49
+ file_handler = RotatingFileHandler(log_file, maxBytes=3 * 1024 * 1024, backupCount=10) # 保留10个备份文件
50
+ file_handler.setLevel(logging.INFO) # 设置文件处理器的日志级别
51
+
52
+ # 创建一个日志格式器,并设置给文件处理器
53
+ formatter = logging.Formatter('[%(asctime)s] %(levelname)s %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
54
+ file_handler.setFormatter(formatter)
55
+
56
+ # 将文件处理器添加到日志记录器
57
+ logger.addHandler(file_handler)
58
+
59
+
60
+ class RedisData(object):
61
+ """
62
+ 存储 string
63
+ """
64
+ def __init__(self, redis_engine, download, cache_ttl: int):
65
+ self.redis_engine = redis_engine # Redis 数据处理引擎
66
+ self.download = download # MySQL 数据处理引擎
67
+ self.cache_ttl = cache_ttl * 60 # 缓存过期时间(秒)
68
+
69
+ def get_from_mysql(
70
+ self,
71
+ db_name: str,
72
+ table_name: str,
73
+ set_year: bool,
74
+ start_date,
75
+ end_date
76
+ ) -> pd.DataFrame:
77
+ """
78
+ 从 MySQL 读取数据并返回 DataFrame
79
+
80
+ Args:
81
+ set_year: 表名是否包含年份后缀
82
+ """
83
+ dfs = []
84
+ if set_year:
85
+ current_year = datetime.datetime.today().year
86
+ for year in range(2024, current_year + 1):
87
+ df = self._fetch_table_data(
88
+ db_name, f"{table_name}_{year}", start_date, end_date
89
+ )
90
+ if df is not None:
91
+ dfs.append(df)
92
+ else:
93
+ df = self._fetch_table_data(db_name, table_name, start_date, end_date)
94
+ if df is not None:
95
+ dfs.append(df)
96
+
97
+ combined_df = pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
98
+ if combined_df.empty:
99
+ logger.info(f"警告: {db_name}.{table_name} 未读取到数据")
100
+ else:
101
+ combined_df = self._convert_date_columns(combined_df)
102
+ return combined_df
103
+
104
+ def get_from_redis(
105
+ self,
106
+ db_name: str,
107
+ table_name: str,
108
+ set_year: bool,
109
+ start_date,
110
+ end_date
111
+ ) -> pd.DataFrame:
112
+ """
113
+ 从 Redis 获取数据,若缓存过期/不完整则触发异步更新
114
+ """
115
+ start_dt = pd.to_datetime(start_date)
116
+ end_dt = pd.to_datetime(end_date)
117
+ cache_key = self._generate_cache_key(db_name, table_name, set_year)
118
+
119
+ # 尝试获取缓存元数据
120
+ try:
121
+ ttl = self.redis_engine.ttl(cache_key)
122
+ cache_data = self._fetch_redis_data(cache_key)
123
+ except Exception as e:
124
+ logger.info(f"Redis 连接异常: {e},直接访问 MySQL")
125
+ return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
126
+
127
+ # 缓存失效处理逻辑
128
+ if ttl < 60 or cache_data.empty:
129
+ self._trigger_async_cache_update(
130
+ cache_key, db_name, table_name, set_year, start_date, end_date, cache_data
131
+ )
132
+ return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
133
+
134
+ # 处理有效缓存数据
135
+ filtered_df = self._filter_by_date_range(cache_data, start_dt, end_dt)
136
+ if not filtered_df.empty:
137
+ return filtered_df
138
+
139
+ # 缓存数据不满足查询范围要求
140
+ self._trigger_async_cache_update(
141
+ cache_key, db_name, table_name, set_year, start_date, end_date, cache_data
142
+ )
143
+ return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
144
+
145
+ def set_redis(
146
+ self,
147
+ cache_key: str,
148
+ db_name: str,
149
+ table_name: str,
150
+ set_year: bool,
151
+ start_date,
152
+ end_date,
153
+ existing_data: pd.DataFrame
154
+ ) -> pd.DataFrame:
155
+ """
156
+ 异步更新 Redis 缓存,合并新旧数据
157
+ """
158
+ try:
159
+ # 从 MySQL 获取新数据
160
+ new_data = self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
161
+ if new_data.empty:
162
+ return pd.DataFrame()
163
+
164
+ # 合并历史数据
165
+ combined_data = self._merge_data(new_data, existing_data)
166
+
167
+ # 序列化并存储到 Redis
168
+ serialized_data = self._serialize_data(combined_data)
169
+ self.redis_engine.set(cache_key, serialized_data)
170
+ self.redis_engine.expire(cache_key, self.cache_ttl)
171
+
172
+ logger.info(f"缓存更新 {cache_key} | 数据量: {len(combined_data)}")
173
+ return combined_data
174
+
175
+ except Exception as e:
176
+ logger.info(f"缓存更新失败: {cache_key} - {str(e)}")
177
+ return pd.DataFrame()
178
+
179
+ # Helper Methods ------------------------------------------------
180
+
181
+ def _fetch_table_data(
182
+ self,
183
+ db_name: str,
184
+ table_name: str,
185
+ start_date,
186
+ end_date
187
+ ) -> pd.DataFrame:
188
+ """封装 MySQL 数据获取逻辑"""
189
+ try:
190
+ return self.download.data_to_df(
191
+ db_name=db_name,
192
+ table_name=table_name,
193
+ start_date=start_date,
194
+ end_date=end_date,
195
+ projection={}
196
+ )
197
+ except Exception as e:
198
+ logger.info(f"MySQL 查询异常 {db_name}.{table_name}: {e}")
199
+ return pd.DataFrame()
200
+
201
+ def _fetch_redis_data(self, cache_key: str) -> pd.DataFrame:
202
+ """从 Redis 获取并解析数据(自动转换日期列)"""
203
+ try:
204
+ data = self.redis_engine.get(cache_key)
205
+ if not data:
206
+ return pd.DataFrame()
207
+ # 反序列化数据
208
+ df = pd.DataFrame(json.loads(data.decode("utf-8")))
209
+ return self._convert_date_columns(df)
210
+ except Exception as e:
211
+ logger.info(f"Redis 数据解析失败 {cache_key}: {e}")
212
+ return pd.DataFrame()
213
+
214
+ def _convert_date_columns(self, df: pd.DataFrame) -> pd.DataFrame:
215
+ """统一处理日期列转换"""
216
+ if "日期" in df.columns:
217
+ df["日期"] = pd.to_datetime(df["日期"], format="%Y-%m-%d", errors="coerce")
218
+ return df
219
+
220
+ def _generate_cache_key(self, db_name: str, table_name: str, set_year: bool) -> str:
221
+ """生成标准化的缓存键"""
222
+ return f"{db_name}:{table_name}_haveyear" if set_year else f"{db_name}:{table_name}"
223
+
224
+ def _filter_by_date_range(
225
+ self,
226
+ df: pd.DataFrame,
227
+ start_dt: datetime.datetime,
228
+ end_dt: datetime.datetime
229
+ ) -> pd.DataFrame:
230
+ """按日期范围筛选数据"""
231
+ if "日期" not in df.columns:
232
+ return df
233
+ date_mask = (df["日期"] >= start_dt) & (df["日期"] <= end_dt)
234
+ return df[date_mask].copy()
235
+
236
+ def _trigger_async_cache_update(
237
+ self,
238
+ cache_key: str,
239
+ db_name: str,
240
+ table_name: str,
241
+ set_year: bool,
242
+ start_date: str,
243
+ end_date: str,
244
+ existing_data: pd.DataFrame
245
+ ):
246
+ """启动异步缓存更新线程"""
247
+ thread = threading.Thread(
248
+ target=self.set_redis,
249
+ args=(cache_key, db_name, table_name, set_year, start_date, end_date, existing_data),
250
+ daemon=True
251
+ )
252
+ thread.start()
253
+
254
+ def _merge_data(self, new_data: pd.DataFrame, existing_data: pd.DataFrame) -> pd.DataFrame:
255
+ """合并新旧数据集"""
256
+ if existing_data.empty or "日期" not in existing_data.columns:
257
+ return new_data
258
+
259
+ new_min = new_data["日期"].min()
260
+ new_max = new_data["日期"].max()
261
+ valid_historical = existing_data[
262
+ (existing_data["日期"] < new_min) | (existing_data["日期"] > new_max)
263
+ ]
264
+ return pd.concat([new_data, valid_historical], ignore_index=True).drop_duplicates(subset=["日期"])
265
+
266
+ def _serialize_data(self, df: pd.DataFrame) -> str:
267
+ """序列化 DataFrame 并处理日期类型"""
268
+ temp_df = df.copy()
269
+ date_cols = temp_df.select_dtypes(include=["datetime64[ns]"]).columns
270
+ for col in date_cols:
271
+ temp_df[col] = temp_df[col].dt.strftime("%Y-%m-%d")
272
+ return temp_df.to_json(orient="records", force_ascii=False)
273
+
274
+ class RedisDataHash(object):
275
+ """
276
+ 存储 hash
277
+ Redis缓存与MySQL数据联合查询处理器
278
+
279
+ 功能特性:
280
+ - 支持带年份分表的MySQL数据查询
281
+ - 多级缓存策略(内存缓存+Redis缓存)
282
+ - 异步缓存更新机制
283
+ - 自动处理日期范围和数据类型转换
284
+ """
285
+
286
+ def __init__(self, redis_engine, download, cache_ttl: int):
287
+ self.redis_engine = redis_engine
288
+ self.download = download
289
+ self.cache_ttl = cache_ttl * 60 # 转换为秒存储
290
+
291
+ def get_from_mysql(
292
+ self,
293
+ db_name: str,
294
+ table_name: str,
295
+ set_year: bool,
296
+ start_date,
297
+ end_date
298
+ ) -> pd.DataFrame:
299
+ dfs = []
300
+ if set_year:
301
+ current_year = datetime.datetime.today().year
302
+ for year in range(2024, current_year + 1):
303
+ df = self._fetch_table_data(
304
+ db_name, f"{table_name}_{year}", start_date, end_date
305
+ )
306
+ if df is not None:
307
+ dfs.append(df)
308
+ else:
309
+ df = self._fetch_table_data(db_name, table_name, start_date, end_date)
310
+ if df is not None:
311
+ dfs.append(df)
312
+
313
+ combined_df = pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
314
+ if combined_df.empty:
315
+ logger.warn(f"warning: {db_name}.{table_name} 未读取到数据")
316
+ else:
317
+ combined_df = self._convert_date_columns(combined_df)
318
+ return combined_df
319
+
320
+ def get_from_redis(
321
+ self,
322
+ db_name: str,
323
+ table_name: str,
324
+ set_year: bool,
325
+ start_date,
326
+ end_date
327
+ ) -> pd.DataFrame:
328
+ start_dt = pd.to_datetime(start_date).floor('D')
329
+ end_dt = pd.to_datetime(end_date).floor('D')
330
+ cache_key = self._generate_cache_key(db_name, table_name, set_year)
331
+
332
+ try:
333
+ ttl = self.redis_engine.ttl(cache_key)
334
+ if ttl < 60:
335
+ cache_data = self._fetch_redis_data(cache_key)
336
+ self._trigger_async_cache_update(
337
+ cache_key, db_name, table_name, set_year, start_date, end_date, cache_data
338
+ )
339
+ return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
340
+
341
+ # 生成月份范围
342
+ start_month = start_dt.to_period('M')
343
+ end_month = end_dt.to_period('M')
344
+ months = pd.period_range(start_month, end_month, freq='M').strftime("%Y%m").tolist()
345
+ cache_data = self._fetch_redis_data(cache_key, months)
346
+
347
+ if cache_data.empty:
348
+ self._trigger_async_cache_update(
349
+ cache_key, db_name, table_name, set_year, start_date, end_date, cache_data
350
+ )
351
+ return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
352
+
353
+ filtered_df = self._filter_by_date_range(cache_data, start_dt, end_dt)
354
+ if not filtered_df.empty:
355
+ if '日期' in filtered_df.columns.tolist():
356
+ exsit_min_date = filtered_df['日期'].min()
357
+ if exsit_min_date <= start_dt:
358
+ return filtered_df
359
+ else:
360
+ return filtered_df
361
+
362
+ self._trigger_async_cache_update(
363
+ cache_key, db_name, table_name, set_year, start_date, end_date, cache_data
364
+ )
365
+ return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
366
+
367
+ except Exception as e:
368
+ logger.error(f"Redis 连接异常: {e},直接访问 MySQL")
369
+ return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
370
+
371
+ def set_redis(
372
+ self,
373
+ cache_key: str,
374
+ db_name: str,
375
+ table_name: str,
376
+ set_year: bool,
377
+ start_date,
378
+ end_date,
379
+ existing_data: pd.DataFrame
380
+ ) -> None:
381
+ try:
382
+ new_data = self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
383
+ if new_data.empty:
384
+ return
385
+
386
+ combined_data = self._merge_data(new_data, existing_data)
387
+
388
+ if not combined_data.empty:
389
+ if '日期' not in combined_data.columns.tolist():
390
+ serialized_data = self._serialize_data(combined_data)
391
+ self.redis_engine.hset(cache_key, "all", serialized_data)
392
+ self.redis_engine.expire(cache_key, self.cache_ttl)
393
+ else:
394
+ # 按月分片存储
395
+ combined_data['month'] = combined_data['日期'].dt.to_period('M').dt.strftime("%Y%m")
396
+ for month_str, group in combined_data.groupby('month'):
397
+ group = group.drop(columns=['month'])
398
+ serialized_data = self._serialize_data(group)
399
+ self.redis_engine.hset(cache_key, month_str, serialized_data)
400
+ self.redis_engine.expire(cache_key, self.cache_ttl)
401
+ logger.info(f"缓存更新 {cache_key} | 数据量: {len(combined_data)}")
402
+ except Exception as e:
403
+ logger.error(f"缓存更新失败: {cache_key} - {str(e)}")
404
+
405
+ def _fetch_table_data(
406
+ self,
407
+ db_name: str,
408
+ table_name: str,
409
+ start_date,
410
+ end_date
411
+ ) -> pd.DataFrame:
412
+ try:
413
+ return self.download.data_to_df(
414
+ db_name=db_name,
415
+ table_name=table_name,
416
+ start_date=start_date,
417
+ end_date=end_date,
418
+ projection={}
419
+ )
420
+ except Exception as e:
421
+ logger.error(f"MySQL 查询异常 {db_name}.{table_name}: {e}")
422
+ return pd.DataFrame()
423
+
424
+ def _fetch_redis_data(self, cache_key: str, months: list = None) -> pd.DataFrame:
425
+ try:
426
+ if months is not None:
427
+ fields = months.copy()
428
+ fields.append('all')
429
+ data_list = self.redis_engine.hmget(cache_key, fields)
430
+ dfs = []
431
+ for data, field in zip(data_list, fields):
432
+ if data:
433
+ df = pd.DataFrame(json.loads(data.decode("utf-8")))
434
+ df = self._convert_date_columns(df)
435
+ dfs.append(df)
436
+ return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
437
+ else:
438
+ data_dict = self.redis_engine.hgetall(cache_key)
439
+ dfs = []
440
+ for field, data in data_dict.items():
441
+ try:
442
+ df = pd.DataFrame(json.loads(data.decode("utf-8")))
443
+ df = self._convert_date_columns(df)
444
+ dfs.append(df)
445
+ except Exception as e:
446
+ logger.error(f"Redis 数据解析失败 {cache_key} 字段 {field}: {e}")
447
+ return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
448
+ except Exception as e:
449
+ logger.error(f"Redis 数据获取失败 {cache_key}: {e}")
450
+ return pd.DataFrame()
451
+
452
+ def _convert_date_columns(self, df: pd.DataFrame) -> pd.DataFrame:
453
+ if "日期" in df.columns:
454
+ df["日期"] = pd.to_datetime(df["日期"], format="%Y-%m-%d", errors="coerce")
455
+ return df
456
+
457
+ def _generate_cache_key(self, db_name: str, table_name: str, set_year: bool) -> str:
458
+ return f"{db_name}:{table_name}_haveyear" if set_year else f"{db_name}:{table_name}"
459
+
460
+ def _filter_by_date_range(
461
+ self,
462
+ df: pd.DataFrame,
463
+ start_dt: datetime.datetime,
464
+ end_dt: datetime.datetime
465
+ ) -> pd.DataFrame:
466
+ if "日期" not in df.columns:
467
+ return df
468
+ date_mask = (df["日期"] >= start_dt) & (df["日期"] <= end_dt)
469
+ return df[date_mask].copy()
470
+
471
+ def _trigger_async_cache_update(
472
+ self,
473
+ cache_key: str,
474
+ db_name: str,
475
+ table_name: str,
476
+ set_year: bool,
477
+ start_date: str,
478
+ end_date: str,
479
+ existing_data: pd.DataFrame
480
+ ):
481
+ thread = threading.Thread(
482
+ target=self.set_redis,
483
+ args=(cache_key, db_name, table_name, set_year, start_date, end_date, existing_data),
484
+ daemon=True
485
+ )
486
+ thread.start()
487
+
488
+ def _merge_data(self, new_data: pd.DataFrame, existing_data: pd.DataFrame) -> pd.DataFrame:
489
+ if existing_data.empty or "日期" not in existing_data.columns:
490
+ return new_data
491
+ new_data["日期"] = pd.to_datetime(new_data["日期"])
492
+ existing_data["日期"] = pd.to_datetime(existing_data["日期"])
493
+
494
+ new_min = new_data["日期"].min()
495
+ new_max = new_data["日期"].max()
496
+
497
+ valid_historical = existing_data[
498
+ (existing_data["日期"] < new_min) | (existing_data["日期"] > new_max)
499
+ ]
500
+ merged_data = pd.concat([new_data, valid_historical], ignore_index=True)
501
+ merged_data.sort_values(['日期'], ascending=[False], ignore_index=True, inplace=True)
502
+ return merged_data
503
+
504
+ def _serialize_data(self, df: pd.DataFrame) -> bytes:
505
+ if df.empty:
506
+ return json.dumps([], ensure_ascii=False).encode("utf-8")
507
+ temp_df = df.copy()
508
+
509
+ date_cols = temp_df.select_dtypes(include=["datetime64[ns]"]).columns
510
+ for col in date_cols:
511
+ if temp_df[col].isna().all():
512
+ temp_df[col] = temp_df[col].astype(object)
513
+ temp_df[col] = (
514
+ temp_df[col]
515
+ .dt.strftime("%Y-%m-%d")
516
+ .where(temp_df[col].notna(), None)
517
+ )
518
+
519
+ def safe_null_convert(series):
520
+ if series.isna().all():
521
+ return series.astype(object).where(pd.notnull(series), None)
522
+ return series.where(pd.notnull(series), None)
523
+
524
+ temp_df = temp_df.apply(safe_null_convert)
525
+
526
+ def decimal_serializer(obj):
527
+ if obj is None:
528
+ return None
529
+ if isinstance(obj, Decimal):
530
+ return round(float(obj), 6)
531
+ elif isinstance(obj, pd.Timestamp):
532
+ return obj.strftime("%Y-%m-%d %H:%M:%S")
533
+ elif isinstance(obj, np.generic):
534
+ return obj.item()
535
+ elif isinstance(obj, (datetime.date, datetime.datetime)):
536
+ return obj.isoformat()
537
+ elif isinstance(obj, (list, tuple, set)):
538
+ return [decimal_serializer(item) for item in obj]
539
+ elif isinstance(obj, dict):
540
+ return {decimal_serializer(k): decimal_serializer(v) for k, v in obj.items()}
541
+ elif isinstance(obj, bytes):
542
+ return obj.decode("utf-8", errors="replace")
543
+ elif isinstance(obj, pd.Series):
544
+ return obj.to_list()
545
+ else:
546
+ try:
547
+ json.dumps(obj)
548
+ return obj
549
+ except TypeError:
550
+ logger.error(f"无法序列化类型 {type(obj)}: {str(obj)}")
551
+ raise
552
+
553
+ try:
554
+ data_records = temp_df.to_dict(orient="records")
555
+ except Exception as e:
556
+ logger.error(f"数据转换字典失败: {str(e)}")
557
+ raise
558
+
559
+ if not data_records:
560
+ return json.dumps([], ensure_ascii=False).encode("utf-8")
561
+
562
+ try:
563
+ return json.dumps(
564
+ data_records,
565
+ ensure_ascii=False,
566
+ default=decimal_serializer
567
+ ).encode("utf-8")
568
+ except TypeError as e:
569
+ logger.error(f"序列化失败,请检查未处理的数据类型: {str(e)}")
570
+ raise
571
+
572
+
573
+ if __name__ == '__main__':
574
+ # # ****************************************************
575
+ # # 这一部分在外部定义,只需要定义一次,开始
576
+ # redis_config = {
577
+ # 'host': '127.0.0.1',
578
+ # 'port': 6379, # 默认Redis端口
579
+ # 'db': 0, # 默认Redis数据库索引
580
+ # # 'username': 'default',
581
+ # 'password': redis_password,
582
+ # }
583
+ # # redis 实例化
584
+ # r = redis.Redis(**redis_config)
585
+ # # mysql 实例化
586
+ # d = s_query.QueryDatas(username=username, password=password, host=host, port=port)
587
+ # # 将两个库的实例化对象传给 RedisData 类,并实例化数据处理引擎
588
+ # m = RedisData(redis_engin=r, download=d)
589
+ # # ****************************************************
590
+ #
591
+ # # 以下为动态获取数据库数据
592
+ # db_name = '聚合数据'
593
+ # table_name = '多店推广场景_按日聚合'
594
+ # set_year = False
595
+ # df = m.get_from_redis(
596
+ # db_name=db_name,
597
+ # table_name=table_name,
598
+ # set_year=set_year,
599
+ # start_date='2025-01-01',
600
+ # end_date='2025-01-31'
601
+ # )
602
+ # logger.info(df)
603
+ #
604
+
605
+ logger.info(socket.gethostname())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 3.6.9
3
+ Version: 3.6.10
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -41,5 +41,6 @@ mdbq/pbix/refresh_all.py
41
41
  mdbq/pbix/refresh_all_old.py
42
42
  mdbq/redis/__init__.py
43
43
  mdbq/redis/getredis.py
44
+ mdbq/redis/getredis_优化hash.py
44
45
  mdbq/spider/__init__.py
45
46
  mdbq/spider/aikucun.py
@@ -3,7 +3,7 @@
3
3
  from setuptools import setup, find_packages
4
4
 
5
5
  setup(name='mdbq',
6
- version='3.6.9',
6
+ version='3.6.10',
7
7
  author='xigua, ',
8
8
  author_email="2587125111@qq.com",
9
9
  url='https://pypi.org/project/mdbq',
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes