mdbq 3.6.2__py3-none-any.whl → 3.6.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/redis/getredis.py CHANGED
@@ -1,4 +1,5 @@
1
1
  # -*- coding: UTF-8 –*-
2
+ import os.path
2
3
  import redis
3
4
  import socket
4
5
  from mdbq.mysql import s_query
@@ -7,6 +8,7 @@ import pandas as pd
7
8
  import json
8
9
  import datetime
9
10
  import threading
11
+ import logging
10
12
 
11
13
 
12
14
  if socket.gethostname() == 'company' or socket.gethostname() == 'Mac2.local':
@@ -14,166 +16,276 @@ if socket.gethostname() == 'company' or socket.gethostname() == 'Mac2.local':
14
16
  conf_data = conf['Windows']['company']['mysql']['local']
15
17
  username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
16
18
  redis_password = conf['Windows']['company']['redis']['local']['password']
19
+ elif socket.gethostname() == 'MacBookPro':
20
+ conf = myconfig.main()
21
+ conf_data = conf['Windows']['xigua_lx']['mysql']['local']
22
+ username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
23
+ redis_password = conf['Windows']['company']['redis']['local']['password']
17
24
  else:
18
25
  conf = myconfig.main()
19
26
  conf_data = conf['Windows']['xigua_lx']['mysql']['local']
20
27
  username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
21
28
  redis_password = conf['Windows']['company']['redis']['local']['password'] # redis 使用本地数据,全部机子相同
22
29
 
30
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(message)s')
31
+
32
+ # 获取当前模块的日志记录器
33
+ logger = logging.getLogger(__name__)
34
+
35
+ # 创建一个文件处理器,用于将日志写入文件
36
+ file_handler = logging.FileHandler(os.path.join('/Users/xigua/Downloads', 'redis.log'))
37
+ file_handler.setLevel(logging.INFO) # 设置文件处理器的日志级别
38
+
39
+ # 创建一个日志格式器,并设置给文件处理器
40
+ formatter = logging.Formatter('[%(asctime)s] %(levelname)s %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
41
+ file_handler.setFormatter(formatter)
42
+
43
+ # 将文件处理器添加到日志记录器
44
+ logger.addHandler(file_handler)
23
45
 
24
46
 
25
47
  class RedisData(object):
26
- def __init__(self, redis_engin, download):
27
- self.redis_engin = redis_engin # redis 数据处理引擎
28
- self.download = download # mysql 数据处理引擎
29
- self.minute = 60 # 缓存过期时间: 分钟
48
+ def __init__(self, redis_engine, download, cache_ttl: int):
49
+ self.redis_engine = redis_engine # Redis 数据处理引擎
50
+ self.download = download # MySQL 数据处理引擎
51
+ self.cache_ttl = cache_ttl * 60 # 缓存过期时间(秒)
30
52
 
31
- def get_from_mysql(self, _db_name, _table_name, _set_year, start_date, end_date):
53
+ def get_from_mysql(
54
+ self,
55
+ db_name: str,
56
+ table_name: str,
57
+ set_year: bool,
58
+ start_date,
59
+ end_date
60
+ ) -> pd.DataFrame:
32
61
  """
33
- _set_year: _table_name 中是否含有年份
62
+ MySQL 读取数据并返回 DataFrame
63
+
64
+ Args:
65
+ set_year: 表名是否包含年份后缀
34
66
  """
35
- if _set_year:
36
- __res = []
37
- for year in range(2024, datetime.datetime.today().year + 1):
38
- _df = self.download.data_to_df(
39
- db_name=_db_name,
40
- table_name=f'{_table_name}_{year}',
41
- start_date=start_date,
42
- end_date=end_date,
43
- projection={},
67
+ dfs = []
68
+ if set_year:
69
+ current_year = datetime.datetime.today().year
70
+ for year in range(2024, current_year + 1):
71
+ df = self._fetch_table_data(
72
+ db_name, f"{table_name}_{year}", start_date, end_date
44
73
  )
45
- __res.append(_df)
46
- _df = pd.concat(__res, ignore_index=True)
74
+ if df is not None:
75
+ dfs.append(df)
47
76
  else:
48
- _df = self.download.data_to_df(
49
- db_name=_db_name,
50
- table_name=_table_name,
51
- start_date=start_date,
52
- end_date=end_date,
53
- projection={},
54
- )
55
- if len(_df) == 0:
56
- print(f'{_db_name} - {_table_name}: mysql读取的数据不能为空')
57
- return pd.DataFrame()
58
- if '日期' in _df.columns.tolist():
59
- _df['日期'] = pd.to_datetime(_df['日期'], format='%Y-%m-%d', errors='ignore') # 转换日期列
60
- return _df
77
+ df = self._fetch_table_data(db_name, table_name, start_date, end_date)
78
+ if df is not None:
79
+ dfs.append(df)
61
80
 
62
- def get_from_redis(self, _db_name, _table_name, _set_year, start_date, end_date):
81
+ combined_df = pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
82
+ if combined_df.empty:
83
+ logger.info(f"警告: {db_name}.{table_name} 未读取到数据")
84
+ else:
85
+ combined_df = self._convert_date_columns(combined_df)
86
+ return combined_df
87
+
88
+ def get_from_redis(
89
+ self,
90
+ db_name: str,
91
+ table_name: str,
92
+ set_year: bool,
93
+ start_date,
94
+ end_date
95
+ ) -> pd.DataFrame:
63
96
  """
64
- _set_year: _table_name 中是否含有年份
65
- _col_list: 如果不传就取 table 的所有列
66
- 对于日期: 最终传出的是日期格式,但如果存入 redis ,需要先格式化为 str,避免日期变整数形式
97
+ Redis 获取数据,若缓存过期/不完整则触发异步更新
67
98
  """
68
- start_date = pd.to_datetime(start_date)
69
- end_date = pd.to_datetime(end_date)
70
- if _set_year:
71
- my_key = f'{_db_name}:{_table_name}_haveyear'
72
- else:
73
- my_key = f'{_db_name}:{_table_name}'
74
- # ttl 对于不存在的键,它返回 -2;而对于没有设置过期时间的键,它返回 -1
99
+ start_dt = pd.to_datetime(start_date)
100
+ end_dt = pd.to_datetime(end_date)
101
+ cache_key = self._generate_cache_key(db_name, table_name, set_year)
102
+
103
+ # 尝试获取缓存元数据
75
104
  try:
76
- ttl_result = self.redis_engin.ttl(my_key)
105
+ ttl = self.redis_engine.ttl(cache_key)
106
+ cache_data = self._fetch_redis_data(cache_key)
77
107
  except Exception as e:
78
- # redis 连接失败, 则绕过 redis 直接从 mysql 获取数据
79
- print('redis 连接失败, 绕过 redis 直接从 mysql 获取数据')
80
- _df = self.get_from_mysql(_db_name=_db_name, _table_name=_table_name, start_date=start_date, end_date=end_date, _set_year=_set_year)
81
- return _df
82
- _df = pd.DataFrame()
83
-
84
- if ttl_result < 60:
85
- # 1. redis 没有该数据时
86
- print(f'数据不存在或过期')
87
- thread = threading.Thread(target=self.set_redis, args=(my_key, _db_name, _table_name, _set_year, start_date, end_date, _df))
88
- thread.start()
89
- # _df = self.set_redis(my_key=my_key, _db_name=_db_name, _table_name=_table_name, _set_year=_set_year, start_date=start_date, end_date=end_date)
90
- _df = self.get_from_mysql(_db_name=_db_name, _table_name=_table_name, start_date=start_date, end_date=end_date, _set_year=_set_year)
91
- return _df
92
- # 2. redis 有数据时
93
- json_string = self.redis_engin.get(my_key)
94
- data_dict = json.loads(json_string.decode('utf-8'))
95
- _df = pd.DataFrame(data_dict)
96
-
97
- if '日期' in _df.columns.tolist():
98
- _df['日期'] = pd.to_datetime(_df['日期'], format='%Y-%m-%d', errors='ignore') # 转换日期列
99
- min_date = _df['日期'].min()
100
- max_date = _df['日期'].max()
101
- # Bug: 如果外部请求日期小于 table 最小日期,每次都要从 mysql 获取数据,即使 redis 缓存了数据
102
- if start_date < min_date: # 外部请求日期小于 redis 数据最小日期
103
- # 3. redis 有数据但数据不完整时
104
- print(f'{start_date} -- {min_date} 数据日期不对劲需要更新')
105
- thread = threading.Thread(target=self.set_redis, args=(my_key, _db_name, _table_name, _set_year, start_date, end_date, _df))
106
- thread.start()
107
- # _df = self.set_redis(my_key=my_key, _db_name=_db_name, _table_name=_table_name, _set_year=_set_year, start_date=start_date, end_date=end_date)
108
- _df = self.get_from_mysql(_db_name=_db_name, _table_name=_table_name, start_date=start_date, end_date=end_date, _set_year=_set_year)
109
- return _df
110
- _df = _df[(_df['日期'] >= start_date) & (_df['日期'] <= end_date)]
111
-
112
- return _df
113
-
114
- def set_redis(self, my_key, _db_name, _table_name, _set_year, start_date, end_date, before_df):
108
+ logger.info(f"Redis 连接异常: {e},直接访问 MySQL")
109
+ return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
110
+
111
+ # 缓存失效处理逻辑
112
+ if ttl < 60 or cache_data.empty:
113
+ self._trigger_async_cache_update(
114
+ cache_key, db_name, table_name, set_year, start_date, end_date, cache_data
115
+ )
116
+ return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
117
+
118
+ # 处理有效缓存数据
119
+ filtered_df = self._filter_by_date_range(cache_data, start_dt, end_dt)
120
+ if not filtered_df.empty:
121
+ return filtered_df
122
+
123
+ # 缓存数据不满足查询范围要求
124
+ self._trigger_async_cache_update(
125
+ cache_key, db_name, table_name, set_year, start_date, end_date, cache_data
126
+ )
127
+ return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
128
+
129
+ def set_redis(
130
+ self,
131
+ cache_key: str,
132
+ db_name: str,
133
+ table_name: str,
134
+ set_year: bool,
135
+ start_date,
136
+ end_date,
137
+ existing_data: pd.DataFrame
138
+ ) -> pd.DataFrame:
115
139
  """
116
- mysql 读取数据并存储 redis
117
- 由于这个函数是异步执行的,从页面段首次加载数据时,可能返回空,等待异步执行结束后会正常返回数据
140
+ 异步更新 Redis 缓存,合并新旧数据
118
141
  """
119
- _df = self.get_from_mysql(
120
- _db_name=_db_name,
121
- _table_name=_table_name,
122
- start_date=start_date,
123
- end_date=end_date,
124
- _set_year=_set_year
142
+ try:
143
+ # 从 MySQL 获取新数据
144
+ new_data = self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
145
+ if new_data.empty:
146
+ return pd.DataFrame()
147
+
148
+ # 合并历史数据
149
+ combined_data = self._merge_data(new_data, existing_data)
150
+
151
+ # 序列化并存储到 Redis
152
+ serialized_data = self._serialize_data(combined_data)
153
+ self.redis_engine.set(cache_key, serialized_data)
154
+ self.redis_engine.expire(cache_key, self.cache_ttl)
155
+
156
+ logger.info(f"缓存更新 {cache_key} | 数据量: {len(combined_data)}")
157
+ return combined_data
158
+
159
+ except Exception as e:
160
+ logger.info(f"缓存更新失败: {cache_key} - {str(e)}")
161
+ return pd.DataFrame()
162
+
163
+ # Helper Methods ------------------------------------------------
164
+
165
+ def _fetch_table_data(
166
+ self,
167
+ db_name: str,
168
+ table_name: str,
169
+ start_date,
170
+ end_date
171
+ ) -> pd.DataFrame:
172
+ """封装 MySQL 数据获取逻辑"""
173
+ try:
174
+ return self.download.data_to_df(
175
+ db_name=db_name,
176
+ table_name=table_name,
177
+ start_date=start_date,
178
+ end_date=end_date,
179
+ projection={}
180
+ )
181
+ except Exception as e:
182
+ logger.info(f"MySQL 查询异常 {db_name}.{table_name}: {e}")
183
+ return pd.DataFrame()
184
+
185
+ def _fetch_redis_data(self, cache_key: str) -> pd.DataFrame:
186
+ """从 Redis 获取并解析数据(自动转换日期列)"""
187
+ try:
188
+ data = self.redis_engine.get(cache_key)
189
+ if not data:
190
+ return pd.DataFrame()
191
+ # 反序列化数据
192
+ df = pd.DataFrame(json.loads(data.decode("utf-8")))
193
+ return self._convert_date_columns(df)
194
+ except Exception as e:
195
+ logger.info(f"Redis 数据解析失败 {cache_key}: {e}")
196
+ return pd.DataFrame()
197
+
198
+ def _convert_date_columns(self, df: pd.DataFrame) -> pd.DataFrame:
199
+ """统一处理日期列转换"""
200
+ if "日期" in df.columns:
201
+ df["日期"] = pd.to_datetime(df["日期"], format="%Y-%m-%d", errors="coerce")
202
+ return df
203
+
204
+ def _generate_cache_key(self, db_name: str, table_name: str, set_year: bool) -> str:
205
+ """生成标准化的缓存键"""
206
+ return f"{db_name}:{table_name}_haveyear" if set_year else f"{db_name}:{table_name}"
207
+
208
+ def _filter_by_date_range(
209
+ self,
210
+ df: pd.DataFrame,
211
+ start_dt: datetime.datetime,
212
+ end_dt: datetime.datetime
213
+ ) -> pd.DataFrame:
214
+ """按日期范围筛选数据"""
215
+ if "日期" not in df.columns:
216
+ return df
217
+ date_mask = (df["日期"] >= start_dt) & (df["日期"] <= end_dt)
218
+ return df[date_mask].copy()
219
+
220
+ def _trigger_async_cache_update(
221
+ self,
222
+ cache_key: str,
223
+ db_name: str,
224
+ table_name: str,
225
+ set_year: bool,
226
+ start_date: str,
227
+ end_date: str,
228
+ existing_data: pd.DataFrame
229
+ ):
230
+ """启动异步缓存更新线程"""
231
+ thread = threading.Thread(
232
+ target=self.set_redis,
233
+ args=(cache_key, db_name, table_name, set_year, start_date, end_date, existing_data),
234
+ daemon=True
125
235
  )
126
- if '日期' in _df.columns.tolist():
127
- _min_date = _df['日期'].min()
128
- _max_date = _df['日期'].max()
129
- if '日期' in before_df.columns.tolist():
130
- # 移除 redis 指定范围的数据,再合并新数据
131
- before_df1 = before_df[(before_df['日期'] < _min_date)]
132
- before_df2 = before_df[(before_df['日期'] > _max_date)]
133
- _df = pd.concat([_df, before_df1, before_df2], ignore_index=True, axis=0)
134
- # if '日期' in _df.columns.tolist():
135
- # _df['日期'] = _df['日期'].astype('str')
136
- for col in _df.columns.tolist():
137
- # 存入 redis ,需要先格式化为 str,避免日期变整数形式
138
- if _df[col].dtype == 'datetime64[ns]':
139
- _df[col] = _df[col].astype('str')
140
- jsondata = _df.to_json(orient='records', force_ascii=False)
141
- self.redis_engin.set(my_key, jsondata)
142
- self.redis_engin.expire(my_key, self.minute * 60) # 设置缓存过期时间: 分钟
143
- if '日期' in _df.columns.tolist():
144
- _df['日期'] = pd.to_datetime(_df['日期'], format='%Y-%m-%d', errors='ignore') # 转换日期列
145
- now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
146
- print(f'{now}: 刷新 redis -> {_db_name}:{_table_name}')
147
- return _df
236
+ thread.start()
237
+
238
+ def _merge_data(self, new_data: pd.DataFrame, existing_data: pd.DataFrame) -> pd.DataFrame:
239
+ """合并新旧数据集"""
240
+ if existing_data.empty or "日期" not in existing_data.columns:
241
+ return new_data
242
+
243
+ new_min = new_data["日期"].min()
244
+ new_max = new_data["日期"].max()
245
+ valid_historical = existing_data[
246
+ (existing_data["日期"] < new_min) | (existing_data["日期"] > new_max)
247
+ ]
248
+ return pd.concat([new_data, valid_historical], ignore_index=True).drop_duplicates(subset=["日期"])
249
+
250
+ def _serialize_data(self, df: pd.DataFrame) -> str:
251
+ """序列化 DataFrame 并处理日期类型"""
252
+ temp_df = df.copy()
253
+ date_cols = temp_df.select_dtypes(include=["datetime64[ns]"]).columns
254
+ for col in date_cols:
255
+ temp_df[col] = temp_df[col].dt.strftime("%Y-%m-%d")
256
+ return temp_df.to_json(orient="records", force_ascii=False)
148
257
 
149
258
 
150
259
  if __name__ == '__main__':
151
- # ****************************************************
152
- # 这一部分在外部定义,只需要定义一次,开始
153
- redis_config = {
154
- 'host': '127.0.0.1',
155
- 'port': 6379, # 默认Redis端口
156
- 'db': 0, # 默认Redis数据库索引
157
- # 'username': 'default',
158
- 'password': redis_password,
159
- }
160
- # redis 实例化
161
- r = redis.Redis(**redis_config)
162
- # mysql 实例化
163
- d = s_query.QueryDatas(username=username, password=password, host=host, port=port)
164
- # 将两个库的实例化对象传给 RedisData 类,并实例化数据处理引擎
165
- m = RedisData(redis_engin=r, download=d)
166
- # ****************************************************
167
-
168
- # 以下为动态获取数据库数据
169
- db_name = '聚合数据'
170
- table_name = '多店推广场景_按日聚合'
171
- set_year = False
172
- df = m.get_from_redis(
173
- _db_name=db_name,
174
- _table_name=table_name,
175
- _set_year=set_year,
176
- start_date='2025-01-01',
177
- end_date='2025-01-31'
178
- )
179
- print(df)
260
+ # # ****************************************************
261
+ # # 这一部分在外部定义,只需要定义一次,开始
262
+ # redis_config = {
263
+ # 'host': '127.0.0.1',
264
+ # 'port': 6379, # 默认Redis端口
265
+ # 'db': 0, # 默认Redis数据库索引
266
+ # # 'username': 'default',
267
+ # 'password': redis_password,
268
+ # }
269
+ # # redis 实例化
270
+ # r = redis.Redis(**redis_config)
271
+ # # mysql 实例化
272
+ # d = s_query.QueryDatas(username=username, password=password, host=host, port=port)
273
+ # # 将两个库的实例化对象传给 RedisData 类,并实例化数据处理引擎
274
+ # m = RedisData(redis_engin=r, download=d)
275
+ # # ****************************************************
276
+ #
277
+ # # 以下为动态获取数据库数据
278
+ # db_name = '聚合数据'
279
+ # table_name = '多店推广场景_按日聚合'
280
+ # set_year = False
281
+ # df = m.get_from_redis(
282
+ # db_name=db_name,
283
+ # table_name=table_name,
284
+ # set_year=set_year,
285
+ # start_date='2025-01-01',
286
+ # end_date='2025-01-31'
287
+ # )
288
+ # logger.info(df)
289
+ #
290
+
291
+ logger.info(socket.gethostname())
@@ -0,0 +1,265 @@
1
+ # -*- coding: UTF-8 –*-
2
+ import redis
3
+ import socket
4
+ from mdbq.mysql import s_query
5
+ from mdbq.config import myconfig
6
+ import pandas as pd
7
+ import json
8
+ import datetime
9
+ import threading
10
+
11
+
12
+ if socket.gethostname() == 'company' or socket.gethostname() == 'Mac2.local':
13
+ conf = myconfig.main()
14
+ conf_data = conf['Windows']['company']['mysql']['local']
15
+ username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
16
+ redis_password = conf['Windows']['company']['redis']['local']['password']
17
+ # elif socket.gethostname() == 'MacBook-Pro.local':
18
+ # conf = myconfig.main()
19
+ # conf_data = conf['Windows']['xigua_lx']['mysql']['local']
20
+ # username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
21
+ # redis_password = conf['Windows']['company']['redis']['local']['password']
22
+ else:
23
+ conf = myconfig.main()
24
+ conf_data = conf['Windows']['xigua_lx']['mysql']['local']
25
+ username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
26
+ redis_password = conf['Windows']['company']['redis']['local']['password'] # redis 使用本地数据,全部机子相同
27
+
28
+
29
+
30
+ class RedisData(object):
31
+ def __init__(self, redis_engin, download):
32
+ self.redis_engin = redis_engin # redis 数据处理引擎
33
+ self.download = download # mysql 数据处理引擎
34
+ self.minute = 60 # 缓存过期时间: 分钟
35
+
36
+ def get_from_mysql(self, _db_name, _table_name, _set_year, start_date, end_date):
37
+ """
38
+ _set_year: _table_name 中是否含有年份
39
+ """
40
+ if _set_year:
41
+ __res = []
42
+ for year in range(2024, datetime.datetime.today().year + 1):
43
+ _df = self.download.data_to_df(
44
+ db_name=_db_name,
45
+ table_name=f'{_table_name}_{year}',
46
+ start_date=start_date,
47
+ end_date=end_date,
48
+ projection={},
49
+ )
50
+ __res.append(_df)
51
+ _df = pd.concat(__res, ignore_index=True)
52
+ else:
53
+ _df = self.download.data_to_df(
54
+ db_name=_db_name,
55
+ table_name=_table_name,
56
+ start_date=start_date,
57
+ end_date=end_date,
58
+ projection={},
59
+ )
60
+ if len(_df) == 0:
61
+ print(f'{_db_name} - {_table_name}: mysql读取的数据不能为空')
62
+ return pd.DataFrame()
63
+ if '日期' in _df.columns.tolist():
64
+ _df['日期'] = pd.to_datetime(_df['日期'], format='%Y-%m-%d', errors='ignore') # 转换日期列
65
+ return _df
66
+
67
+ def get_from_redis(self, _db_name, _table_name, _set_year, start_date, end_date):
68
+ """
69
+ _set_year: _table_name 中是否含有年份
70
+ _col_list: 如果不传就取 table 的所有列
71
+ 对于日期: 最终传出的是日期格式,但如果存入 redis ,需要先格式化为 str,避免日期变整数形式
72
+ """
73
+ start_date = pd.to_datetime(start_date)
74
+ end_date = pd.to_datetime(end_date)
75
+ if _set_year:
76
+ my_key = f'{_db_name}:{_table_name}_haveyear'
77
+ else:
78
+ my_key = f'{_db_name}:{_table_name}'
79
+ # ttl 对于不存在的键,它返回 -2;而对于没有设置过期时间的键,它返回 -1
80
+ try:
81
+ ttl_result = self.redis_engin.ttl(my_key)
82
+ except Exception as e:
83
+ # redis 连接失败, 则绕过 redis 直接从 mysql 获取数据
84
+ print('redis 连接失败, 绕过 redis 直接从 mysql 获取数据')
85
+ _df = self.get_from_mysql(_db_name=_db_name, _table_name=_table_name, start_date=start_date, end_date=end_date, _set_year=_set_year)
86
+ return _df
87
+ _df = pd.DataFrame()
88
+
89
+ if ttl_result < 60:
90
+ # 1. redis 没有该数据时
91
+ print(f'数据不存在或过期')
92
+ thread = threading.Thread(target=self.set_redis, args=(my_key, _db_name, _table_name, _set_year, start_date, end_date, _df))
93
+ thread.start()
94
+ # _df = self.set_redis(my_key=my_key, _db_name=_db_name, _table_name=_table_name, _set_year=_set_year, start_date=start_date, end_date=end_date)
95
+ _df = self.get_from_mysql(_db_name=_db_name, _table_name=_table_name, start_date=start_date, end_date=end_date, _set_year=_set_year)
96
+ return _df
97
+ # 2. redis 有数据时
98
+ json_string = self.redis_engin.get(my_key)
99
+ data_dict = json.loads(json_string.decode('utf-8'))
100
+ _df = pd.DataFrame(data_dict)
101
+
102
+ if '日期' in _df.columns.tolist():
103
+ _df['日期'] = pd.to_datetime(_df['日期'], format='%Y-%m-%d', errors='ignore') # 转换日期列
104
+ min_date = _df['日期'].min()
105
+ max_date = _df['日期'].max()
106
+ # Bug: 如果外部请求日期小于 table 最小日期,每次都要从 mysql 获取数据,即使 redis 缓存了数据
107
+ if start_date < min_date: # 外部请求日期小于 redis 数据最小日期
108
+ # 3. redis 有数据但数据不完整时
109
+ print(f'{start_date} -- {min_date} 数据日期需要更新')
110
+ thread = threading.Thread(target=self.set_redis, args=(my_key, _db_name, _table_name, _set_year, start_date, end_date, _df))
111
+ thread.start()
112
+ # _df = self.set_redis(my_key=my_key, _db_name=_db_name, _table_name=_table_name, _set_year=_set_year, start_date=start_date, end_date=end_date)
113
+ _df = self.get_from_mysql(_db_name=_db_name, _table_name=_table_name, start_date=start_date, end_date=end_date, _set_year=_set_year)
114
+ return _df
115
+ _df = _df[(_df['日期'] >= start_date) & (_df['日期'] <= end_date)]
116
+
117
+ return _df
118
+
119
+ def set_redis(self, my_key, _db_name, _table_name, _set_year, start_date, end_date, before_df):
120
+ """
121
+ 从MySQL读取数据并存储到Redis(异步执行)
122
+
123
+ Args:
124
+ my_key: Redis存储键名
125
+ _db_name: 数据库名称
126
+ _table_name: 数据表名称
127
+ _set_year: 数据集年份
128
+ start_date: 查询开始日期
129
+ end_date: 查询结束日期
130
+ before_df: 合并用的历史数据
131
+
132
+ Returns:
133
+ pd.DataFrame: 处理后的数据集(含历史数据合并)
134
+ """
135
+ # 异常处理容器
136
+ datetime_cols = []
137
+
138
+ try:
139
+ # 从MySQL获取数据
140
+ _df = self.get_from_mysql(
141
+ _db_name=_db_name,
142
+ _table_name=_table_name,
143
+ start_date=start_date,
144
+ end_date=end_date,
145
+ _set_year=_set_year
146
+ )
147
+
148
+ # 日期列处理(当新旧数据都存在日期列时)
149
+ if '日期' in _df.columns and '日期' in before_df.columns:
150
+ # 获取当前数据时间范围
151
+ _min_date, _max_date = _df['日期'].min(), _df['日期'].max()
152
+
153
+ # 筛选需要保留的历史数据
154
+ mask = (before_df['日期'] < _min_date) | (before_df['日期'] > _max_date)
155
+ valid_history = before_df[mask]
156
+
157
+ # 合并数据
158
+ _df = pd.concat([_df, valid_history], ignore_index=True, axis=0)
159
+ _df.drop_duplicates(subset='日期', keep='first', inplace=True) # 可选去重
160
+
161
+ # 预处理时间类型转换
162
+ datetime_cols = _df.select_dtypes(include=['datetime64[ns]']).columns.tolist()
163
+ if datetime_cols:
164
+ _df[datetime_cols] = _df[datetime_cols].astype(str)
165
+
166
+ # 空数据检查
167
+ if _df.empty:
168
+ print(f'Warning: {_table_name} 空数据集,跳过Redis存储')
169
+ return pd.DataFrame()
170
+
171
+ # Redis存储操作
172
+ self.redis_engin.set(my_key, _df.to_json(orient='records', force_ascii=False))
173
+ self.redis_engin.expire(my_key, self.minute * 60)
174
+
175
+ # 恢复时间类型(返回用)
176
+ if datetime_cols:
177
+ _df[datetime_cols] = _df[datetime_cols].apply(pd.to_datetime, errors='coerce')
178
+
179
+ # 记录操作日志
180
+ print(f'{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}'
181
+ f' | 刷新Redis {_db_name}:{_table_name}'
182
+ f' | 数据量:{len(_df)}行')
183
+
184
+ except Exception as e:
185
+ print(f'Error: {_table_name} 数据处理失败 - {str(e)}')
186
+ _df = pd.DataFrame()
187
+
188
+ finally:
189
+ # 确保返回前恢复时间类型
190
+ if datetime_cols and not _df.empty:
191
+ _df[datetime_cols] = _df[datetime_cols].apply(pd.to_datetime, errors='ignore')
192
+
193
+ return _df
194
+
195
+ def set_redis_bak(self, my_key, _db_name, _table_name, _set_year, start_date, end_date, before_df):
196
+ """
197
+ 从 mysql 读取数据并存储 redis
198
+ 由于这个函数是异步执行的,从页面段首次加载数据时,可能返回空,等待异步执行结束后会正常返回数据
199
+ """
200
+ _df = self.get_from_mysql(
201
+ _db_name=_db_name,
202
+ _table_name=_table_name,
203
+ start_date=start_date,
204
+ end_date=end_date,
205
+ _set_year=_set_year
206
+ )
207
+ if '日期' in _df.columns.tolist():
208
+ _min_date = _df['日期'].min()
209
+ _max_date = _df['日期'].max()
210
+ if '日期' in before_df.columns.tolist():
211
+ # 移除 redis 指定范围的数据,再合并新数据
212
+ before_df1 = before_df[(before_df['日期'] < _min_date)]
213
+ before_df2 = before_df[(before_df['日期'] > _max_date)]
214
+ _df = pd.concat([_df, before_df1, before_df2], ignore_index=True, axis=0)
215
+ # if '日期' in _df.columns.tolist():
216
+ # _df['日期'] = _df['日期'].astype('str')
217
+ for col in _df.columns.tolist():
218
+ # 存入 redis ,需要先格式化为 str,避免日期变整数形式
219
+ if _df[col].dtype == 'datetime64[ns]':
220
+ _df[col] = _df[col].astype('str')
221
+ if len(_df) == 0:
222
+ print(f'{_table_name}: 写入 redis 的数据不能为空')
223
+ return pd.DataFrame()
224
+ jsondata = _df.to_json(orient='records', force_ascii=False)
225
+ self.redis_engin.set(my_key, jsondata)
226
+ self.redis_engin.expire(my_key, self.minute * 60) # 设置缓存过期时间: 分钟
227
+ if '日期' in _df.columns.tolist():
228
+ _df['日期'] = pd.to_datetime(_df['日期'], format='%Y-%m-%d', errors='ignore') # 转换日期列
229
+ now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
230
+ print(f'{now}: 刷新 redis -> {_db_name}:{_table_name}')
231
+ return _df
232
+
233
+ if __name__ == '__main__':
234
+ # # ****************************************************
235
+ # # 这一部分在外部定义,只需要定义一次,开始
236
+ # redis_config = {
237
+ # 'host': '127.0.0.1',
238
+ # 'port': 6379, # 默认Redis端口
239
+ # 'db': 0, # 默认Redis数据库索引
240
+ # # 'username': 'default',
241
+ # 'password': redis_password,
242
+ # }
243
+ # # redis 实例化
244
+ # r = redis.Redis(**redis_config)
245
+ # # mysql 实例化
246
+ # d = s_query.QueryDatas(username=username, password=password, host=host, port=port)
247
+ # # 将两个库的实例化对象传给 RedisData 类,并实例化数据处理引擎
248
+ # m = RedisData(redis_engin=r, download=d)
249
+ # # ****************************************************
250
+ #
251
+ # # 以下为动态获取数据库数据
252
+ # db_name = '聚合数据'
253
+ # table_name = '多店推广场景_按日聚合'
254
+ # set_year = False
255
+ # df = m.get_from_redis(
256
+ # _db_name=db_name,
257
+ # _table_name=table_name,
258
+ # _set_year=set_year,
259
+ # start_date='2025-01-01',
260
+ # end_date='2025-01-31'
261
+ # )
262
+ # print(df)
263
+ #
264
+
265
+ print(socket.gethostname())
@@ -0,0 +1,235 @@
1
+ # -*- coding: UTF-8 –*-
2
+ import redis
3
+ import socket
4
+ from mdbq.mysql import s_query
5
+ from mdbq.config import myconfig
6
+ import pandas as pd
7
+ import json
8
+ import datetime
9
+ import threading
10
+
11
+
12
+ if socket.gethostname() == 'company' or socket.gethostname() == 'Mac2.local':
13
+ conf = myconfig.main()
14
+ conf_data = conf['Windows']['company']['mysql']['local']
15
+ username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
16
+ redis_password = conf['Windows']['company']['redis']['local']['password']
17
+ # elif socket.gethostname() == 'MacBook-Pro.local':
18
+ # conf = myconfig.main()
19
+ # conf_data = conf['Windows']['xigua_lx']['mysql']['local']
20
+ # username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
21
+ # redis_password = conf['Windows']['company']['redis']['local']['password']
22
+ else:
23
+ conf = myconfig.main()
24
+ conf_data = conf['Windows']['xigua_lx']['mysql']['local']
25
+ username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
26
+ redis_password = conf['Windows']['company']['redis']['local']['password'] # redis 使用本地数据,全部机子相同
27
+
28
+
29
+ class RedisData:
30
+ def __init__(self, redis_engine, download, cache_ttl: int = 60):
31
+ """
32
+ 初始化Redis数据处理对象
33
+
34
+ :param redis_engine: Redis连接引擎
35
+ :param download: 数据库下载处理器
36
+ :param cache_ttl: 缓存过期时间(分钟)
37
+ """
38
+ self.redis_engine = redis_engine
39
+ self.download = download
40
+ self.cache_ttl = cache_ttl * 60 # 转换为秒
41
+ self.lock = threading.Lock() # 线程锁
42
+
43
+ def _handle_datetime_columns(self, df: pd.DataFrame) -> pd.DataFrame:
44
+ """统一处理日期列转换"""
45
+ if '日期' in df.columns:
46
+ df['日期'] = pd.to_datetime(df['日期'], errors='coerce', format='%Y-%m-%d')
47
+ return df
48
+
49
+ def get_from_mysql(self, db_name: str, table_name: str,
50
+ start_date, end_date,
51
+ set_year: bool) -> pd.DataFrame:
52
+ """
53
+ 从MySQL获取数据
54
+
55
+ :param set_year: 是否按年份分表
56
+ """
57
+ try:
58
+ if set_year:
59
+ current_year = datetime.datetime.now().year
60
+ dfs = []
61
+ # 动态获取需要查询的年份范围
62
+ min_year = min(2024, pd.to_datetime(start_date).year) # 根据实际需求调整
63
+ for year in range(min_year, current_year + 1):
64
+ table = f"{table_name}_{year}"
65
+ df = self.download.data_to_df(
66
+ db_name=db_name,
67
+ table_name=table,
68
+ start_date=start_date,
69
+ end_date=end_date,
70
+ projection={}
71
+ )
72
+ if not df.empty:
73
+ dfs.append(df)
74
+ _df = pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
75
+ else:
76
+ _df = self.download.data_to_df(
77
+ db_name=db_name,
78
+ table_name=table_name,
79
+ start_date=start_date,
80
+ end_date=end_date,
81
+ projection={}
82
+ )
83
+
84
+ if _df.empty:
85
+ print(f"空数据 - {db_name}.{table_name}")
86
+ return pd.DataFrame()
87
+
88
+ return self._handle_datetime_columns(_df)
89
+
90
+ except Exception as e:
91
+ print(f"MySQL查询失败: {str(e)}")
92
+ return pd.DataFrame()
93
+
94
+ def get_from_redis(self, db_name: str, table_name: str,
95
+ start_date, end_date,
96
+ set_year: bool) -> pd.DataFrame:
97
+ """
98
+ 从Redis获取数据(带自动缓存更新)
99
+ """
100
+ start_dt = pd.to_datetime(start_date)
101
+ end_dt = pd.to_datetime(end_date)
102
+
103
+ # 生成统一缓存键
104
+ cache_key = f"{db_name}:{table_name}{'_year' if set_year else ''}"
105
+
106
+ try:
107
+ # 检查缓存状态
108
+ with self.lock: # 保证线程安全
109
+ ttl = self.redis_engine.ttl(cache_key)
110
+
111
+ if ttl < 300: # 剩余时间小于5分钟时触发更新
112
+ print(f"异步缓存更新: {cache_key}")
113
+ threading.Thread(
114
+ target=self._update_cache,
115
+ args=(cache_key, db_name, table_name,
116
+ start_date, end_date, set_year),
117
+ daemon=True
118
+ ).start()
119
+
120
+ # 获取缓存数据
121
+ cached_data = self.redis_engine.get(cache_key)
122
+ if not cached_data:
123
+ return self._fallback_to_mysql(db_name, table_name,
124
+ start_date, end_date, set_year)
125
+ json_str = cached_data.decode('utf-8')
126
+ _df = pd.read_json(json_str, orient='records')
127
+ _df = self._handle_datetime_columns(_df)
128
+
129
+ # 数据范围校验
130
+ if '日期' in _df.columns:
131
+ cache_min = _df['日期'].min()
132
+ cache_max = _df['日期'].max()
133
+
134
+ # 请求范围超出缓存范围时需要更新
135
+ if start_dt < cache_min or end_dt > cache_max:
136
+ print(f"请求范围超出缓存 {start_dt.strftime('%Y-%m-%d ')} - {end_dt.strftime('%Y-%m-%d ')}")
137
+ self._update_cache(cache_key, db_name, table_name,
138
+ start_date, end_date, set_year, _df)
139
+ return self._fallback_to_mysql(db_name, table_name,
140
+ start_date, end_date, set_year)
141
+
142
+ return _df[(start_dt <= _df['日期']) & (_df['日期'] <= end_dt)]
143
+ return _df
144
+
145
+ except Exception as e:
146
+ print(f"Redis操作失败: {str(e)}")
147
+ return self._fallback_to_mysql(db_name, table_name,
148
+ start_date, end_date, set_year)
149
+
150
+ def _update_cache(self, cache_key: str, db_name: str, table_name: str,
151
+ start_date: str, end_date: str, set_year: bool,
152
+ existing_df: pd.DataFrame = None) -> None:
153
+ """缓存更新核心逻辑"""
154
+ try:
155
+ # 获取最新数据
156
+ new_data = self.get_from_mysql(
157
+ db_name=db_name,
158
+ table_name=table_name,
159
+ start_date=start_date,
160
+ end_date=end_date,
161
+ set_year=set_year
162
+ )
163
+
164
+ # 合并历史数据
165
+ if existing_df is not None and not new_data.empty:
166
+ combined = pd.concat([existing_df, new_data], ignore_index=True)
167
+ combined = combined.drop_duplicates(subset='日期', keep='last')
168
+ else:
169
+ combined = new_data
170
+
171
+ if not combined.empty:
172
+ # 转换日期类型为字符串
173
+ temp_df = combined.copy()
174
+ datetime_cols = temp_df.select_dtypes(include=['datetime64[ns]']).columns
175
+ temp_df[datetime_cols] = temp_df[datetime_cols].astype(str)
176
+
177
+ # 存储到Redis
178
+ with self.lock:
179
+ self.redis_engine.set(
180
+ cache_key,
181
+ temp_df.to_json(orient='records', force_ascii=False),
182
+ ex=self.cache_ttl
183
+ )
184
+ print(f"缓存更新成功: {cache_key} | 记录数: {len(combined)}")
185
+
186
+ except Exception as e:
187
+ print(f"缓存更新失败: {str(e)}")
188
+
189
+ def _fallback_to_mysql(self, db_name: str, table_name: str,
190
+ start_date: str, end_date: str,
191
+ set_year: bool) -> pd.DataFrame:
192
+ """降级到直接MySQL查询"""
193
+ print(f"降级到MySQL查询: {db_name}.{table_name}")
194
+ return self.get_from_mysql(
195
+ db_name=db_name,
196
+ table_name=table_name,
197
+ start_date=start_date,
198
+ end_date=end_date,
199
+ set_year=set_year
200
+ )
201
+
202
+
203
+ if __name__ == '__main__':
204
+ # # ****************************************************
205
+ # # 这一部分在外部定义,只需要定义一次,开始
206
+ # redis_config = {
207
+ # 'host': '127.0.0.1',
208
+ # 'port': 6379, # 默认Redis端口
209
+ # 'db': 0, # 默认Redis数据库索引
210
+ # # 'username': 'default',
211
+ # 'password': redis_password,
212
+ # }
213
+ # # redis 实例化
214
+ # r = redis.Redis(**redis_config)
215
+ # # mysql 实例化
216
+ # d = s_query.QueryDatas(username=username, password=password, host=host, port=port)
217
+ # # 将两个库的实例化对象传给 RedisData 类,并实例化数据处理引擎
218
+ # m = RedisData(redis_engin=r, download=d)
219
+ # # ****************************************************
220
+ #
221
+ # # 以下为动态获取数据库数据
222
+ # db_name = '聚合数据'
223
+ # table_name = '多店推广场景_按日聚合'
224
+ # set_year = False
225
+ # df = m.get_from_redis(
226
+ # _db_name=db_name,
227
+ # _table_name=table_name,
228
+ # _set_year=set_year,
229
+ # start_date='2025-01-01',
230
+ # end_date='2025-01-31'
231
+ # )
232
+ # print(df)
233
+ #
234
+
235
+ print(socket.gethostname())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 3.6.2
3
+ Version: 3.6.4
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -33,10 +33,12 @@ mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,239
33
33
  mdbq/pbix/refresh_all.py,sha256=OBT9EewSZ0aRS9vL_FflVn74d4l2G00wzHiikCC4TC0,5926
34
34
  mdbq/pbix/refresh_all_old.py,sha256=_pq3WSQ728GPtEG5pfsZI2uTJhU8D6ra-htIk1JXYzw,7192
35
35
  mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
36
- mdbq/redis/getredis.py,sha256=zpkdy624Drj2ujvOcldKS4PSN8bfKKmIIRkB0KqL7x0,8657
36
+ mdbq/redis/getredis.py,sha256=5aLu2_S5hvv48uVQSjp9wxHBE8WOiAkpSpugXcy9FXM,11184
37
+ mdbq/redis/getredis_bak20250131.py,sha256=DQazRyKVnaDziP9JEIofAJF8dw_PKyLEgwEznlTnGDw,12284
38
+ mdbq/redis/getredis_deepseek.py,sha256=bQ6VfiTYkQ5cYK6MYJPKgwbdrwsOLBLrV-ObblKaurA,9653
37
39
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
38
40
  mdbq/spider/aikucun.py,sha256=v7VO5gtEXR6_4Q6ujbTyu1FHu7TXHcwSQ6hIO249YH0,22208
39
- mdbq-3.6.2.dist-info/METADATA,sha256=EtY3g6bo6ZkYkc_47W3VJO6toGHWZXIFVaBgXZtjX_U,243
40
- mdbq-3.6.2.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
41
- mdbq-3.6.2.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
42
- mdbq-3.6.2.dist-info/RECORD,,
41
+ mdbq-3.6.4.dist-info/METADATA,sha256=PSbZGIuI23xbX4wJsaqFJltQAJ4ugR_gaD1SPTnljMQ,243
42
+ mdbq-3.6.4.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
43
+ mdbq-3.6.4.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
44
+ mdbq-3.6.4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.44.0)
2
+ Generator: setuptools (70.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5