mdbq 0.0.6__tar.gz → 0.0.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {mdbq-0.0.6 → mdbq-0.0.8}/PKG-INFO +1 -1
  2. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/aggregation/aggregation.py +11 -12
  3. mdbq-0.0.8/mdbq/aggregation/query_data.py +321 -0
  4. mdbq-0.0.8/mdbq/mysql/s_query.py +120 -0
  5. mdbq-0.0.8/mdbq/mysql/year_month_day.py +38 -0
  6. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq.egg-info/PKG-INFO +1 -1
  7. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq.egg-info/SOURCES.txt +1 -0
  8. {mdbq-0.0.6 → mdbq-0.0.8}/setup.py +1 -1
  9. mdbq-0.0.6/mdbq/aggregation/query_data.py +0 -307
  10. mdbq-0.0.6/mdbq/mysql/s_query.py +0 -165
  11. {mdbq-0.0.6 → mdbq-0.0.8}/README.txt +0 -0
  12. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/__init__.py +0 -0
  13. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/__version__.py +0 -0
  14. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/aggregation/__init__.py +0 -0
  15. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/bdup/__init__.py +0 -0
  16. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/bdup/bdup.py +0 -0
  17. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/clean/__init__.py +0 -0
  18. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/clean/data_clean.py +0 -0
  19. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/company/__init__.py +0 -0
  20. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/company/copysh.py +0 -0
  21. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/config/__init__.py +0 -0
  22. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/config/get_myconf.py +0 -0
  23. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/config/update_conf.py +0 -0
  24. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/log/__init__.py +0 -0
  25. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/log/mylogger.py +0 -0
  26. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/mongo/__init__.py +0 -0
  27. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/mongo/mongo.py +0 -0
  28. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/mysql/__init__.py +0 -0
  29. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/mysql/mysql.py +0 -0
  30. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/other/__init__.py +0 -0
  31. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/other/porxy.py +0 -0
  32. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/other/pov_city.py +0 -0
  33. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/other/ua_sj.py +0 -0
  34. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/pbix/__init__.py +0 -0
  35. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/pbix/pbix_refresh.py +0 -0
  36. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/pbix/refresh_all.py +0 -0
  37. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq/spider/__init__.py +0 -0
  38. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq.egg-info/dependency_links.txt +0 -0
  39. {mdbq-0.0.6 → mdbq-0.0.8}/mdbq.egg-info/top_level.txt +0 -0
  40. {mdbq-0.0.6 → mdbq-0.0.8}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 0.0.6
3
+ Version: 0.0.8
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -118,11 +118,13 @@ class DataTypes:
118
118
  """
119
119
  # dtypes = df.dtypes.apply(str).to_dict() # 将 dataframe 数据类型转为字典形式
120
120
  df.replace([np.inf, -np.inf], 0, inplace=True) # 清理一些非法值
121
-
122
- cols = df.columns.tolist()
123
121
  df.replace(to_replace=['\\N', '-', '--', '', 'nan'], value=0, regex=False, inplace=True) # 替换掉特殊字符
124
122
  df.replace(to_replace=[','], value='', regex=True, inplace=True)
123
+ df.replace(to_replace=['="'], value='', regex=True, inplace=True) # ="和"不可以放在一起清洗, 因为有: id=86785565
124
+ df.replace(to_replace=['"'], value='', regex=True, inplace=True)
125
+ cols = df.columns.tolist()
125
126
  for col in cols:
127
+ # df[col] = df[col].apply(lambda x: re.sub('[="]', '', str(x)) if '="' in str(x) else x)
126
128
  # 百分比在某些数据库中不兼容, 转换百分比为小数
127
129
  df[col] = df[col].apply(lambda x: float(float((str(x).rstrip("%"))) / 100) if str(x).endswith('%') and '~' not in str(x) else x)
128
130
  # 尝试转换合适的数据类型
@@ -976,9 +978,9 @@ def update_dtypte():
976
978
 
977
979
  def upload():
978
980
  """ 上传一个文件夹到数据库 """
979
- path = '/Users/xigua/数据中心/原始文件2/生意参谋/客户_客户概况_画像'
980
- db_name = '生意参谋数据2'
981
- collection_name = '客户_客户概况_画像'
981
+ path = '/Users/xigua/数据中心/原始文件2/生意经/宝贝指标'
982
+ db_name = '生意经2'
983
+ collection_name = '宝贝指标'
982
984
 
983
985
  username, password, host, port = get_myconf.select_config_values(
984
986
  target_service='home_lx',
@@ -1018,10 +1020,7 @@ def upload():
1018
1020
  df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
1019
1021
  if len(df) == 0:
1020
1022
  continue
1021
- for col in df.columns.tolist():
1022
- df[col] = df[col].apply(lambda x: re.sub('[="]', '', str(x)) if '="' in str(x) else x)
1023
- # df.replace(to_replace=['--'], value='', regex=False, inplace=True)
1024
- df = dt.convert_df_cols(df=df)
1023
+ df = dt.convert_df_cols(df=df) # 清理列名和 df 中的非法字符
1025
1024
  try:
1026
1025
  df = df.astype(dtypes)
1027
1026
  except Exception as e:
@@ -1044,8 +1043,8 @@ def upload():
1044
1043
 
1045
1044
 
1046
1045
  if __name__ == '__main__':
1047
- username, password, host, port = get_myconf.select_config_values(target_service='aliyun', database='mongodb')
1048
- print(username, password, host, port)
1046
+ # username, password, host, port = get_myconf.select_config_values(target_service='aliyun', database='mongodb')
1047
+ # print(username, password, host, port)
1049
1048
 
1050
1049
  # main()
1051
- # upload()
1050
+ upload()
@@ -0,0 +1,321 @@
1
+ # -*- coding: UTF-8 –*-
2
+ from mdbq.mongo import mongo
3
+ from mdbq.mysql import s_query
4
+ from mdbq.config import get_myconf
5
+ import datetime
6
+ from dateutil.relativedelta import relativedelta
7
+ import pandas as pd
8
+ import numpy as np
9
+ import platform
10
+ import getpass
11
+ import json
12
+ import os
13
+
14
+
15
+ class MongoDatasQuery:
16
+ """
17
+ 从 数据库 中下载数据
18
+ self.output: 数据库默认导出目录
19
+ self.is_maximize: 是否最大转化数据
20
+ """
21
+ def __init__(self, target_service):
22
+ # target_service 从哪个服务器下载数据
23
+ self.months = 0 # 下载几个月数据, 0 表示当月, 1 是上月 1 号至今
24
+ # 实例化一个下载类
25
+ username, password, host, port = get_myconf.select_config_values(target_service=target_service, database='mongodb')
26
+ self.download = mongo.DownMongo(username=username, password=password, host=host, port=port, save_path=None)
27
+
28
+ def tg_wxt(self):
29
+ self.download.start_date, self.download.end_date = self.months_data(num=self.months)
30
+ projection = {
31
+ '日期': 1,
32
+ '场景名字': 1,
33
+ '主体id': 1,
34
+ '花费': 1,
35
+ '展现量': 1,
36
+ '点击量': 1,
37
+ '总购物车数': 1,
38
+ '总成交笔数': 1,
39
+ '总成交金额': 1,
40
+ '自然流量曝光量': 1,
41
+ '直接成交笔数': 1,
42
+ '直接成交金额': 1,
43
+ }
44
+ df = self.download.data_to_df(
45
+ db_name='天猫数据2',
46
+ collection_name='推广数据_宝贝主体报表',
47
+ projection=projection,
48
+ )
49
+ return df
50
+
51
+ @staticmethod
52
+ def days_data(days, end_date=None):
53
+ """ 读取近 days 天的数据 """
54
+ if not end_date:
55
+ end_date = datetime.datetime.now()
56
+ start_date = end_date - datetime.timedelta(days=days)
57
+ return pd.to_datetime(start_date), pd.to_datetime(end_date)
58
+
59
+ @staticmethod
60
+ def months_data(num=0, end_date=None):
61
+ """ 读取近 num 个月的数据, 0 表示读取当月的数据 """
62
+ if not end_date:
63
+ end_date = datetime.datetime.now()
64
+ start_date = end_date - relativedelta(months=num) # n 月以前的今天
65
+ start_date = f'{start_date.year}-{start_date.month}-01' # 替换为 n 月以前的第一天
66
+ return pd.to_datetime(start_date), pd.to_datetime(end_date)
67
+
68
+
69
+ class MysqlDatasQuery:
70
+ """
71
+ 从数据库中下载数据
72
+ """
73
+ def __init__(self, target_service):
74
+ # target_service 从哪个服务器下载数据
75
+ self.months = 0 # 下载几个月数据, 0 表示当月, 1 是上月 1 号至今
76
+ # 实例化一个下载类
77
+ username, password, host, port = get_myconf.select_config_values(target_service=target_service, database='mysql')
78
+ self.download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
79
+
80
+ def tg_wxt(self):
81
+ start_date, end_date = self.months_data(num=self.months)
82
+ projection = {
83
+ '日期': 1,
84
+ '场景名字': 1,
85
+ '主体id': 1,
86
+ '花费': 1,
87
+ '展现量': 1,
88
+ '点击量': 1,
89
+ '总购物车数': 1,
90
+ '总成交笔数': 1,
91
+ '总成交金额': 1,
92
+ '自然流量曝光量': 1,
93
+ '直接成交笔数': 1,
94
+ '直接成交金额': 1,
95
+ }
96
+ df = self.download.data_to_df(
97
+ db_name='天猫数据2',
98
+ tabel_name='推广数据_宝贝主体报表',
99
+ start_date=start_date,
100
+ end_date=end_date,
101
+ projection=projection,
102
+ )
103
+ return df
104
+
105
+ def syj(self):
106
+ start_date, end_date = self.months_data(num=self.months)
107
+ projection = {
108
+ '日期': 1,
109
+ '宝贝id': 1,
110
+ '商家编码': 1,
111
+ '行业类目': 1,
112
+ '销售额': 1,
113
+ '销售量': 1,
114
+ '订单数': 1,
115
+ '退货量': 1,
116
+ '退款额': 1,
117
+ '退货量_发货后_': 1,
118
+ }
119
+ df = self.download.data_to_df(
120
+ db_name='生意经2',
121
+ tabel_name='宝贝指标',
122
+ start_date=start_date,
123
+ end_date=end_date,
124
+ projection=projection,
125
+ )
126
+ return df
127
+
128
+
129
+ @staticmethod
130
+ def months_data(num=0, end_date=None):
131
+ """ 读取近 num 个月的数据, 0 表示读取当月的数据 """
132
+ if not end_date:
133
+ end_date = datetime.datetime.now()
134
+ start_date = end_date - relativedelta(months=num) # n 月以前的今天
135
+ start_date = f'{start_date.year}-{start_date.month}-01' # 替换为 n 月以前的第一天
136
+ return pd.to_datetime(start_date), pd.to_datetime(end_date)
137
+
138
+
139
+ class GroupBy:
140
+ """ 数据聚合和导出 """
141
+ def __init__(self):
142
+ # self.output: 数据库默认导出目录
143
+ if platform.system() == 'Darwin':
144
+ self.output = os.path.join('/Users', getpass.getuser(), '数据中心/数据库导出')
145
+ elif platform.system() == 'Windows':
146
+ self.output = os.path.join('C:\\同步空间\\BaiduSyncdisk\\数据库导出')
147
+ else:
148
+ self.output = os.path.join('数据中心/数据库导出')
149
+
150
+ def groupby(self, df, tabel_name, is_maximize=True):
151
+ """
152
+ self.is_maximize: 是否最大转化数据
153
+ """
154
+
155
+ if '宝贝主体报表' in tabel_name:
156
+ df.rename(columns={
157
+ '场景名字': '营销场景',
158
+ '主体id': '商品id',
159
+ '总购物车数': '加购量',
160
+ '总成交笔数': '成交笔数',
161
+ '总成交金额': '成交金额'
162
+ }, inplace=True)
163
+ df = df.astype({
164
+ '花费': float,
165
+ '展现量': int,
166
+ '点击量': int,
167
+ '加购量': int,
168
+ '成交笔数': int,
169
+ '成交金额': float,
170
+ '自然流量曝光量': int,
171
+ '直接成交笔数': int,
172
+ '直接成交金额': float,
173
+ }, errors='raise')
174
+ df.fillna(0, inplace=True)
175
+ if is_maximize:
176
+ df = df.groupby(['日期', '营销场景', '商品id', '花费', '展现量', '点击量'], as_index=False).agg(
177
+ **{'加购量': ('加购量', np.max),
178
+ '成交笔数': ('成交笔数', np.max),
179
+ '成交金额': ('成交金额', np.max),
180
+ '自然流量曝光量': ('自然流量曝光量', np.max),
181
+ '直接成交笔数': ('直接成交笔数', np.max),
182
+ '直接成交金额': ('直接成交金额', np.max)
183
+ }
184
+ )
185
+ else:
186
+ df = df.groupby(['日期', '营销场景', '商品id', '花费', '展现量', '点击量'], as_index=False).agg(
187
+ **{'加购量': ('加购量', np.min),
188
+ '成交笔数': ('成交笔数', np.min),
189
+ '成交金额': ('成交金额', np.min),
190
+ '自然流量曝光量': ('自然流量曝光量', np.min),
191
+ '直接成交笔数': ('直接成交笔数', np.max),
192
+ '直接成交金额': ('直接成交金额', np.max)
193
+ }
194
+ )
195
+ df.insert(loc=1, column='推广渠道', value='万相台无界版') # df中插入新列
196
+ return df
197
+ if '宝贝指标' in tabel_name:
198
+ df.fillna(0, inplace=True)
199
+ df = df[(df['销售额'] != 0) | (df['退款额'] != 0)]
200
+ df = df.groupby(['日期', '宝贝id', '商家编码', '行业类目'], as_index=False).agg(
201
+ **{'销售额': ('销售额', np.min),
202
+ '销售量': ('销售量', np.min),
203
+ '订单数': ('订单数', np.min),
204
+ '退货量': ('退货量', np.max),
205
+ '退款额': ('退款额', np.max),
206
+ '退货量_发货后_': ('退货量_发货后_', np.max),
207
+ }
208
+ )
209
+ df['件均价'] = df.apply(lambda x: x['销售额'] / x['销售量'] if x['销售量'] > 0 else 0, axis=1).round(
210
+ 0) # 两列运算, 避免除以0
211
+ df['价格带'] = df['件均价'].apply(
212
+ lambda x: '2000+' if x >= 2000
213
+ else '1000+' if x >= 1000
214
+ else '500+' if x >= 500
215
+ else '300+' if x >= 300
216
+ else '300以下'
217
+ )
218
+ return df
219
+ else:
220
+ print(f'<{tabel_name}>: Groupby 类尚未配置,数据为空')
221
+ return pd.DataFrame({})
222
+
223
+ def as_csv(self, df, filename, path=None, encoding='utf-8_sig',
224
+ index=False, header=True, st_ascend=None, ascend=None, freq=None):
225
+ """
226
+ path: 默认导出目录 self.output, 这个函数的 path 作为子文件夹,可以不传,
227
+ st_ascend: 排序参数 ['column1', 'column2']
228
+ ascend: 升降序 [True, False]
229
+ freq: 将创建子文件夹并按月分类存储, freq='Y', 或 freq='M'
230
+ """
231
+ if len(df) == 0:
232
+ return
233
+ if not path:
234
+ path = self.output
235
+ else:
236
+ path = os.path.join(self.output, path)
237
+ if not os.path.exists(path):
238
+ os.makedirs(path)
239
+ if st_ascend and ascend:
240
+ try:
241
+ df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
242
+ except:
243
+ print(f'{filename}: sort_values排序参数错误!')
244
+ if freq:
245
+ if '日期' not in df.columns.tolist():
246
+ return print(f'{filename}: 数据缺少日期列,无法按日期分组')
247
+ groups = df.groupby(pd.Grouper(key='日期', freq=freq))
248
+ for name1, df in groups:
249
+ if freq == 'M':
250
+ sheet_name = name1.strftime('%Y-%m')
251
+ elif freq == 'Y':
252
+ sheet_name = name1.strftime('%Y年')
253
+ else:
254
+ sheet_name = '_未分类'
255
+ new_path = os.path.join(path, filename)
256
+ if not os.path.exists(new_path):
257
+ os.makedirs(new_path)
258
+ new_path = os.path.join(new_path, f'{filename}{sheet_name}.csv')
259
+ if st_ascend and ascend: # 这里需要重新排序一次,原因未知
260
+ try:
261
+ df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
262
+ except:
263
+ print(f'{filename}: sort_values排序参数错误!')
264
+
265
+ df.to_csv(new_path, encoding=encoding, index=index, header=header)
266
+ else:
267
+ df.to_csv(os.path.join(path, filename + '.csv'), encoding=encoding, index=index, header=header)
268
+
269
+ def as_json(self, df, filename, path=None, orient='records', force_ascii=False, st_ascend=None, ascend=None):
270
+ if len(df) == 0:
271
+ return
272
+ if not path:
273
+ path = self.output
274
+ else:
275
+ path = os.path.join(self.output, path)
276
+ if not os.path.exists(path):
277
+ os.makedirs(path)
278
+ if st_ascend and ascend:
279
+ try:
280
+ df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
281
+ except:
282
+ print(f'{filename}: sort_values排序参数错误!')
283
+ df.to_json(os.path.join(path, filename + '.json'),
284
+ orient=orient, force_ascii=force_ascii)
285
+
286
+ def as_excel(self, df, filename, path=None, index=False, header=True, engine='openpyxl',
287
+ freeze_panes=(1, 0), st_ascend=None, ascend=None):
288
+ if len(df) == 0:
289
+ return
290
+ if not path:
291
+ path = self.output
292
+ else:
293
+ path = os.path.join(self.output, path)
294
+ if not os.path.exists(path):
295
+ os.makedirs(path)
296
+ if st_ascend and ascend:
297
+ try:
298
+ df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
299
+ except:
300
+ print(f'{filename}: sort_values排序参数错误!')
301
+ df.to_excel(os.path.join(path, filename + '.xlsx'),
302
+ index=index, header=header, engine=engine, freeze_panes=freeze_panes)
303
+
304
+
305
+ def main():
306
+ sdq = MysqlDatasQuery(target_service='home_lx')
307
+ sdq.months = 0
308
+
309
+ # df = sdq.tg_wxt() # 从数据库中获取数据并转为 df
310
+ # g = GroupBy() # 数据聚合
311
+ # df = g.groupby(df=df, tabel_name='推广数据_宝贝主体报表', is_maximize=True)
312
+ # g.as_csv(df=df, filename='推广数据_宝贝主体报表') # 数据导出
313
+
314
+ df = sdq.syj()
315
+ g = GroupBy()
316
+ df = g.groupby(df=df, tabel_name='宝贝指标', is_maximize=True)
317
+ g.as_csv(df=df, filename='宝贝指标')
318
+
319
+
320
+ if __name__ == '__main__':
321
+ main()
@@ -0,0 +1,120 @@
1
+ # -*- coding:utf-8 -*-
2
+ import datetime
3
+ import platform
4
+ import re
5
+ import time
6
+ from functools import wraps
7
+ import warnings
8
+ import pymysql
9
+ import numpy as np
10
+ import pandas as pd
11
+ from sqlalchemy import create_engine
12
+ import os
13
+ import calendar
14
+ from mdbq.config import get_myconf
15
+
16
+ warnings.filterwarnings('ignore')
17
+
18
+
19
+ class QueryDatas:
20
+ def __init__(self, username: str, password: str, host: str, port: int, charset: str = 'utf8mb4'):
21
+ self.username = username
22
+ self.password = password
23
+ self.host = host
24
+ self.port = port
25
+ self.config = {
26
+ 'host': self.host,
27
+ 'port': self.port,
28
+ 'user': self.username,
29
+ 'password': self.password,
30
+ 'charset': charset, # utf8mb4 支持存储四字节的UTF-8字符集
31
+ 'cursorclass': pymysql.cursors.DictCursor,
32
+ }
33
+
34
+ def data_to_df(self, db_name, tabel_name, start_date, end_date, projection: dict=[]):
35
+
36
+ start_date = pd.to_datetime(start_date).strftime('%Y-%m-%d')
37
+ end_date = pd.to_datetime(end_date).strftime('%Y-%m-%d')
38
+ df = pd.DataFrame() # 初始化df
39
+
40
+ if self.check_infos(db_name, tabel_name) == False:
41
+ return df
42
+
43
+ self.config.update({'database': db_name})
44
+ connection = pymysql.connect(**self.config) # 重新连接数据库
45
+ try:
46
+ with connection.cursor() as cursor:
47
+ # 3. 获取数据表的所有列信息
48
+ sql = 'SELECT COLUMN_NAME FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
49
+ cursor.execute(sql, (db_name, {tabel_name}))
50
+ columns = cursor.fetchall()
51
+ cols_exist = [col['COLUMN_NAME'] for col in columns] # 数据表的所有列, 返回 list
52
+
53
+ # 4. 构建 SQL 查询语句
54
+ if projection: # 获取指定列
55
+ columns_in = []
56
+ for key, value in projection.items():
57
+ if value == 1 and key in cols_exist:
58
+ columns_in.append(key) # 提取值为 1 的键并清理不在数据表的键
59
+ columns_in = ', '.join(columns_in)
60
+ if '日期' in cols_exist: # 不论是否指定, 只要数据表有日期,则执行
61
+ sql = (f"SELECT {columns_in} FROM {db_name}.{tabel_name} "
62
+ f"WHERE {'日期'} BETWEEN '{start_date}' AND '{end_date}'")
63
+ else: # 数据表没有日期列时,返回指定列的所有数据
64
+ sql = f"SELECT {columns_in} FROM {db_name}.{tabel_name}"
65
+ else: # 没有指定获取列时
66
+ if '日期' in cols_exist: # 但数据表有日期,仍然执行
67
+ columns_in = ', '.join(cols_exist)
68
+ sql = (f"SELECT {columns_in} FROM {db_name}.{tabel_name} "
69
+ f"WHERE {'日期'} BETWEEN '{start_date}' AND '{end_date}'")
70
+ else: # 没有指定获取列,且数据表也没有日期列,则返回全部列的全部数据
71
+ sql = f"SELECT * FROM {db_name}.{tabel_name}"
72
+ cursor.execute(sql)
73
+ rows = cursor.fetchall() # 获取查询结果
74
+ columns = [desc[0] for desc in cursor.description]
75
+ df = pd.DataFrame(rows, columns=columns) # 转为 df
76
+ except Exception as e:
77
+ print(f'{e}')
78
+ return df
79
+ finally:
80
+ connection.close()
81
+
82
+ if len(df) == 0:
83
+ print(f'database: {db_name}, table: {tabel_name} 查询的数据为空')
84
+ return df
85
+
86
+ def check_infos(self, db_name, tabel_name) -> bool:
87
+ """ 检查数据库、数据表是否存在 """
88
+ connection = pymysql.connect(**self.config) # 连接数据库
89
+ try:
90
+ with connection.cursor() as cursor:
91
+ # 1. 检查数据库是否存在
92
+ cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
93
+ database_exists = cursor.fetchone()
94
+ if not database_exists:
95
+ print(f"Database <{db_name}>: 数据库不存在")
96
+ return False
97
+ finally:
98
+ connection.close() # 这里要断开连接
99
+
100
+ self.config.update({'database': db_name}) # 添加更新 config 字段
101
+ connection = pymysql.connect(**self.config) # 重新连接数据库
102
+ try:
103
+ with connection.cursor() as cursor:
104
+ # 2. 查询表是否存在
105
+ sql = f"SHOW TABLES LIKE '{tabel_name}'"
106
+ cursor.execute(sql)
107
+ if not cursor.fetchone():
108
+ print(f'{db_name} -> <{tabel_name}>: 表不存在')
109
+ return False
110
+ return True
111
+ except Exception as e:
112
+ print(e)
113
+ return False
114
+ finally:
115
+ connection.close() # 断开连接
116
+
117
+
118
+ if __name__ == '__main__':
119
+ username, password, host, port = get_myconf.select_config_values(target_service='company', database='mysql')
120
+ print(username, password, host, port)
@@ -0,0 +1,38 @@
1
+ # -*- coding:utf-8 -*-
2
+ import warnings
3
+ import pandas as pd
4
+ import calendar
5
+
6
+ warnings.filterwarnings('ignore')
7
+
8
+
9
+ def year_month_day(start_date, end_date):
10
+ """
11
+ 使用date_range函数和DataFrame来获取从start_date至end_date之间的所有年月日
12
+ calendar.monthrange: 获取当月第一个工作日的星期值(0,6) 以及当月天数
13
+ 返回值: [{'起始日期': '2025-05-01', '结束日期': '2025-05-31'}, {'起始日期': '2025-06-01', '结束日期': '2025-06-30'}]
14
+ """
15
+ # 替换年月日中的日, 以便即使传入当月日期也有返回值
16
+ try:
17
+ start_date = f'{pd.to_datetime(start_date).year}-{pd.to_datetime(start_date).month}-01'
18
+ except Exception as e:
19
+ print(e)
20
+ return []
21
+ # 使用pandas的date_range创建一个日期范围,频率为'MS'代表每月开始
22
+ date_range = pd.date_range(start=start_date, end=end_date, freq='MS')
23
+ # 转换格式
24
+ year_months = date_range.strftime('%Y-%m').drop_duplicates().sort_values()
25
+
26
+ results = []
27
+ for year_month in year_months:
28
+ year = re.findall(r'(\d{4})', year_month)[0]
29
+ month = re.findall(r'\d{4}-(\d{2})', year_month)[0]
30
+ s, d = calendar.monthrange(int(year), int(month))
31
+ results.append({'起始日期': f'{year_month}-01', '结束日期': f'{year_month}-{d}'})
32
+
33
+ return results # start_date至end_date之间的所有年月日
34
+
35
+
36
+ if __name__ == '__main__':
37
+ results = year_month_day(start_date='2025-05-01', end_date='2025-08-01')
38
+ print(results)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 0.0.6
3
+ Version: 0.0.8
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -25,6 +25,7 @@ mdbq/mongo/mongo.py
25
25
  mdbq/mysql/__init__.py
26
26
  mdbq/mysql/mysql.py
27
27
  mdbq/mysql/s_query.py
28
+ mdbq/mysql/year_month_day.py
28
29
  mdbq/other/__init__.py
29
30
  mdbq/other/porxy.py
30
31
  mdbq/other/pov_city.py
@@ -3,7 +3,7 @@
3
3
  from setuptools import setup, find_packages
4
4
 
5
5
  setup(name='mdbq',
6
- version='0.0.6',
6
+ version='0.0.8',
7
7
  author='xigua, ',
8
8
  author_email="2587125111@qq.com",
9
9
  url='https://pypi.org/project/mdbsql',
@@ -1,307 +0,0 @@
1
- # -*- coding: UTF-8 –*-
2
- from mdbq.mongo import mongo
3
- from mdbq.mysql import s_query
4
- from mdbq.config import get_myconf
5
- import datetime
6
- from dateutil.relativedelta import relativedelta
7
- import pandas as pd
8
- import numpy as np
9
- import platform
10
- import getpass
11
- import json
12
- import os
13
-
14
-
15
- class MongoDatasQuery:
16
- """
17
- 从 数据库 中下载数据
18
- self.output: 数据库默认导出目录
19
- self.is_maximize: 是否最大转化数据
20
- """
21
- def __init__(self, target_service):
22
- # target_service 从哪个服务器下载数据
23
- self.is_maximize = True
24
- if platform.system() == 'Darwin':
25
- self.output = os.path.join('/Users', getpass.getuser(), '数据中心/数据库导出')
26
- elif platform.system() == 'Windows':
27
- self.output = os.path.join('C:\\同步空间\\BaiduSyncdisk\\数据库导出')
28
- else:
29
- self.output = os.path.join('数据中心/数据库导出')
30
-
31
- # 实例化一个下载类
32
- username, password, host, port = get_myconf.select_config_values(target_service=target_service, database='mongodb')
33
- self.download = mongo.DownMongo(username=username, password=password, host=host, port=port, save_path=None)
34
-
35
- def tg_wxt(self):
36
- self.download.start_date, self.download.end_date = self.months_data(num=1)
37
- projection = {
38
- '日期': 1,
39
- '场景名字': 1,
40
- '主体id': 1,
41
- '花费': 1,
42
- '展现量': 1,
43
- '点击量': 1,
44
- '总购物车数': 1,
45
- '总成交笔数': 1,
46
- '总成交金额': 1,
47
- '自然流量曝光量': 1,
48
- '直接成交笔数': 1,
49
- '直接成交金额': 1,
50
- }
51
- df = self.download.data_to_df(db_name='天猫数据2', collection_name='推广数据_宝贝主体报表', projection=projection)
52
- df.rename(columns={
53
- '场景名字': '营销场景',
54
- '主体id': '商品id',
55
- '总购物车数': '加购量',
56
- '总成交笔数': '成交笔数',
57
- '总成交金额': '成交金额'
58
- }, inplace=True)
59
- df = df.astype({
60
- '花费': float,
61
- '展现量': int,
62
- '点击量': int,
63
- '加购量': int,
64
- '成交笔数': int,
65
- '成交金额': float,
66
- '自然流量曝光量': int,
67
- '直接成交笔数': int,
68
- '直接成交金额': float,
69
- }, errors='raise')
70
- df.fillna(0, inplace=True)
71
- if self.is_maximize:
72
- df = df.groupby(['日期', '营销场景', '商品id', '花费', '展现量', '点击量'], as_index=False).agg(
73
- **{'加购量': ('加购量', np.max),
74
- '成交笔数': ('成交笔数', np.max),
75
- '成交金额': ('成交金额', np.max),
76
- '自然流量曝光量': ('自然流量曝光量', np.max),
77
- '直接成交笔数': ('直接成交笔数', np.max),
78
- '直接成交金额': ('直接成交金额', np.max)
79
- }
80
- )
81
- else:
82
- df = df.groupby(['日期', '营销场景', '商品id', '花费', '展现量', '点击量'], as_index=False).agg(
83
- **{'加购量': ('加购量', np.min),
84
- '成交笔数': ('成交笔数', np.min),
85
- '成交金额': ('成交金额', np.min),
86
- '自然流量曝光量': ('自然流量曝光量', np.min),
87
- '直接成交笔数': ('直接成交笔数', np.max),
88
- '直接成交金额': ('直接成交金额', np.max)
89
- }
90
- )
91
- df.insert(loc=1, column='推广渠道', value='万相台无界版') # df中插入新列
92
- # print(df)
93
- return df
94
-
95
- @staticmethod
96
- def days_data(days, end_date=None):
97
- """ 读取近 days 天的数据 """
98
- if not end_date:
99
- end_date = datetime.datetime.now()
100
- start_date = end_date - datetime.timedelta(days=days)
101
- return pd.to_datetime(start_date), pd.to_datetime(end_date)
102
-
103
- @staticmethod
104
- def months_data(num=0, end_date=None):
105
- """ 读取近 num 个月的数据, 0 表示读取当月的数据 """
106
- if not end_date:
107
- end_date = datetime.datetime.now()
108
- start_date = end_date - relativedelta(months=num) # n 月以前的今天
109
- start_date = f'{start_date.year}-{start_date.month}-01' # 替换为 n 月以前的第一天
110
- return pd.to_datetime(start_date), pd.to_datetime(end_date)
111
-
112
- def as_csv(self, df, filename, path=None, encoding='utf-8_sig',
113
- index=False, header=True, st_ascend=None, ascend=None, freq=None):
114
- """
115
- path: 子文件夹,可以不传,默认导出目录 self.output
116
- st_ascend: 排序参数
117
- ascend: 升降序
118
- freq: 将创建子文件夹并按月分类存储, freq='Y',或 freq='M'
119
- """
120
- if not path:
121
- path = self.output
122
- else:
123
- path = os.path.join(self.output, path)
124
- if not os.path.exists(path):
125
- os.makedirs(path)
126
- if st_ascend and ascend:
127
- try:
128
- df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
129
- except:
130
- print(f'{filename}: sort_values排序参数错误!')
131
- if freq:
132
- if '日期' not in df.columns.tolist():
133
- return print(f'{filename}: 数据缺少日期列,无法按日期分组')
134
- groups = df.groupby(pd.Grouper(key='日期', freq=freq))
135
- for name1, df in groups:
136
- if freq == 'M':
137
- sheet_name = name1.strftime('%Y-%m')
138
- elif freq == 'Y':
139
- sheet_name = name1.strftime('%Y年')
140
- else:
141
- sheet_name = '_未分类'
142
- new_path = os.path.join(path, filename)
143
- if not os.path.exists(new_path):
144
- os.makedirs(new_path)
145
- new_path = os.path.join(new_path, f'{filename}{sheet_name}.csv')
146
- if st_ascend and ascend: # 这里需要重新排序一次,原因未知
147
- try:
148
- df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
149
- except:
150
- print(f'{filename}: sort_values排序参数错误!')
151
-
152
- df.to_csv(new_path, encoding=encoding, index=index, header=header)
153
- else:
154
- df.to_csv(os.path.join(path, filename + '.csv'), encoding=encoding, index=index, header=header)
155
-
156
- def as_json(self, df, filename, path=None, orient='records', force_ascii=False, st_ascend=None, ascend=None):
157
- if not path:
158
- path = self.output
159
- else:
160
- path = os.path.join(self.output, path)
161
- if st_ascend and ascend:
162
- try:
163
- df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
164
- except:
165
- print(f'{filename}: sort_values排序参数错误!')
166
- df.to_json(os.path.join(path, filename + '.json'),
167
- orient=orient, force_ascii=force_ascii)
168
-
169
- def as_excel(self, df, filename, path=None, index=False, header=True, engine='openpyxl',
170
- freeze_panes=(1, 0), st_ascend=None, ascend=None):
171
- if not path:
172
- path = self.output
173
- else:
174
- path = os.path.join(self.output, path)
175
- if st_ascend and ascend:
176
- try:
177
- df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
178
- except:
179
- print(f'{filename}: sort_values排序参数错误!')
180
- df.to_excel(os.path.join(path, filename + '.xlsx'),
181
- index=index, header=header, engine=engine, freeze_panes=freeze_panes)
182
-
183
-
184
- class MysqlDatasQuery:
185
- """
186
- 从 数据库 中下载数据
187
- self.output: 数据库默认导出目录
188
- self.is_maximize: 是否最大转化数据
189
- """
190
- def __init__(self, target_service):
191
- # target_service 从哪个服务器下载数据
192
- self.is_maximize = True
193
- if platform.system() == 'Darwin':
194
- self.output = os.path.join('/Users', getpass.getuser(), '数据中心/数据库导出')
195
- elif platform.system() == 'Windows':
196
- self.output = os.path.join('C:\\同步空间\\BaiduSyncdisk\\数据库导出')
197
- else:
198
- self.output = os.path.join('数据中心/数据库导出')
199
- self.months = 1 # 下载几个月数据, 0 表示当月, 1 是上月 1 号至今
200
-
201
- # 实例化一个下载类
202
- username, password, host, port = get_myconf.select_config_values(target_service=target_service, database='mysql')
203
- self.download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
204
-
205
- def tg_wxt(self):
206
- start_date, end_date = self.months_data(num=self.months)
207
- df = self.download.data_to_df(db_name='天猫数据2', tabel_name='推广数据_宝贝主体报表', start_date=start_date, end_date=end_date)
208
- return df
209
-
210
- @staticmethod
211
- def days_data(days, end_date=None):
212
- """ 读取近 days 天的数据 """
213
- if not end_date:
214
- end_date = datetime.datetime.now()
215
- start_date = end_date - datetime.timedelta(days=days)
216
- return pd.to_datetime(start_date), pd.to_datetime(end_date)
217
-
218
- @staticmethod
219
- def months_data(num=0, end_date=None):
220
- """ 读取近 num 个月的数据, 0 表示读取当月的数据 """
221
- if not end_date:
222
- end_date = datetime.datetime.now()
223
- start_date = end_date - relativedelta(months=num) # n 月以前的今天
224
- start_date = f'{start_date.year}-{start_date.month}-01' # 替换为 n 月以前的第一天
225
- return pd.to_datetime(start_date), pd.to_datetime(end_date)
226
-
227
- def as_csv(self, df, filename, path=None, encoding='utf-8_sig',
228
- index=False, header=True, st_ascend=None, ascend=None, freq=None):
229
- """
230
- path: 子文件夹,可以不传,默认导出目录 self.output
231
- st_ascend: 排序参数
232
- ascend: 升降序
233
- freq: 将创建子文件夹并按月分类存储, freq='Y',或 freq='M'
234
- """
235
- if not path:
236
- path = self.output
237
- else:
238
- path = os.path.join(self.output, path)
239
- if not os.path.exists(path):
240
- os.makedirs(path)
241
- if st_ascend and ascend:
242
- try:
243
- df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
244
- except:
245
- print(f'{filename}: sort_values排序参数错误!')
246
- if freq:
247
- if '日期' not in df.columns.tolist():
248
- return print(f'{filename}: 数据缺少日期列,无法按日期分组')
249
- groups = df.groupby(pd.Grouper(key='日期', freq=freq))
250
- for name1, df in groups:
251
- if freq == 'M':
252
- sheet_name = name1.strftime('%Y-%m')
253
- elif freq == 'Y':
254
- sheet_name = name1.strftime('%Y年')
255
- else:
256
- sheet_name = '_未分类'
257
- new_path = os.path.join(path, filename)
258
- if not os.path.exists(new_path):
259
- os.makedirs(new_path)
260
- new_path = os.path.join(new_path, f'{filename}{sheet_name}.csv')
261
- if st_ascend and ascend: # 这里需要重新排序一次,原因未知
262
- try:
263
- df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
264
- except:
265
- print(f'{filename}: sort_values排序参数错误!')
266
-
267
- df.to_csv(new_path, encoding=encoding, index=index, header=header)
268
- else:
269
- df.to_csv(os.path.join(path, filename + '.csv'), encoding=encoding, index=index, header=header)
270
-
271
- def as_json(self, df, filename, path=None, orient='records', force_ascii=False, st_ascend=None, ascend=None):
272
- if not path:
273
- path = self.output
274
- else:
275
- path = os.path.join(self.output, path)
276
- if st_ascend and ascend:
277
- try:
278
- df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
279
- except:
280
- print(f'{filename}: sort_values排序参数错误!')
281
- df.to_json(os.path.join(path, filename + '.json'),
282
- orient=orient, force_ascii=force_ascii)
283
-
284
- def as_excel(self, df, filename, path=None, index=False, header=True, engine='openpyxl',
285
- freeze_panes=(1, 0), st_ascend=None, ascend=None):
286
- if not path:
287
- path = self.output
288
- else:
289
- path = os.path.join(self.output, path)
290
- if st_ascend and ascend:
291
- try:
292
- df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
293
- except:
294
- print(f'{filename}: sort_values排序参数错误!')
295
- df.to_excel(os.path.join(path, filename + '.xlsx'),
296
- index=index, header=header, engine=engine, freeze_panes=freeze_panes)
297
-
298
-
299
- def main():
300
- sdq = MysqlDatasQuery(target_service='company')
301
- sdq.months = 0
302
- df = sdq.tg_wxt()
303
- print(df)
304
-
305
-
306
- if __name__ == '__main__':
307
- main()
@@ -1,165 +0,0 @@
1
- # -*- coding:utf-8 -*-
2
- import datetime
3
- import platform
4
- import re
5
- import time
6
- from functools import wraps
7
- import warnings
8
- import pymysql
9
- import numpy as np
10
- import pandas as pd
11
- from sqlalchemy import create_engine
12
- import os
13
- import calendar
14
- from mdbq.config import get_myconf
15
-
16
- warnings.filterwarnings('ignore')
17
-
18
-
19
- class QueryDatas:
20
- def __init__(self, username: str, password: str, host: str, port: int, charset: str = 'utf8mb4'):
21
- self.username = username
22
- self.password = password
23
- self.host = host
24
- self.port = port
25
- self.config = {
26
- 'host': self.host,
27
- 'port': self.port,
28
- 'user': self.username,
29
- 'password': self.password,
30
- 'charset': charset, # utf8mb4 支持存储四字节的UTF-8字符集
31
- 'cursorclass': pymysql.cursors.DictCursor,
32
- }
33
-
34
- def data_to_df(self, db_name, tabel_name, start_date, end_date, projection=[]):
35
- start_date = pd.to_datetime(start_date).strftime('%Y-%m-%d')
36
- end_date = pd.to_datetime(end_date).strftime('%Y-%m-%d')
37
- df = pd.DataFrame()
38
-
39
- connection = pymysql.connect(**self.config) # 连接数据库
40
- try:
41
- with connection.cursor() as cursor:
42
- cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
43
- database_exists = cursor.fetchone()
44
- if not database_exists:
45
- print(f"Database <{db_name}>: 数据库不存在")
46
- finally:
47
- connection.close() # 这里要断开连接
48
- time.sleep(0.2)
49
-
50
- self.config.update({'database': db_name}) # 添加更新 config 字段
51
- connection = pymysql.connect(**self.config) # 重新连接数据库
52
- try:
53
- with connection.cursor() as cursor:
54
- # 1. 查询表是否存在
55
- sql = f"SHOW TABLES LIKE '{tabel_name}'"
56
- cursor.execute(sql)
57
- if not cursor.fetchone():
58
- print(f'{db_name} -> <{tabel_name}>: 表不存在')
59
- return df
60
-
61
- # 查询列
62
- for col in projection:
63
- sql = ('SELECT 1 FROM information_schema.columns WHERE table_schema = %s AND table_name = %s AND '
64
- 'column_name = %s')
65
- cursor.execute(sql, (db_name, {tabel_name}, col))
66
- if cursor.fetchone() is None: # 移除不存在的列
67
- projection.remove(col)
68
- except Exception as e:
69
- print(e)
70
- return df
71
- finally:
72
- connection.close() # 断开连接
73
-
74
- # before_time = time.time()
75
- # 读取数据
76
- self.config.update({'database': db_name})
77
- connection = pymysql.connect(**self.config) # 重新连接数据库
78
- try:
79
- with connection.cursor() as cursor:
80
- if not projection: # 如果未指定,则查询所有列,获取 cols_exist
81
- sql = 'SELECT COLUMN_NAME FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
82
- cursor.execute(sql, (db_name, {tabel_name}))
83
- columns = cursor.fetchall()
84
- cols_exist = [col['COLUMN_NAME'] for col in columns]
85
-
86
- if '日期' in projection or '日期' in cols_exist: # 指定含日期的 projection 或者未指定 projection 但表中有日期列
87
- sql = f"SELECT * FROM {db_name}.{tabel_name} WHERE {'日期'} BETWEEN '%s' AND '%s'" % (start_date, end_date)
88
- elif projection: # 指定未含日期的 projection
89
- sql = f"SELECT '%s' FROM {db_name}.{tabel_name}" % (', '.join(projection))
90
- else: # 未指定 projection 且表中无日期
91
- sql = f"SELECT * FROM {db_name}.{tabel_name}"
92
- cursor.execute(sql)
93
- rows = cursor.fetchall() # 获取查询结果
94
- columns = [desc[0] for desc in cursor.description]
95
- df = pd.DataFrame(rows, columns=columns)
96
- except Exception as e:
97
- print(f'{e} {db_name} -> <{tabel_name}>: 表不存在')
98
- return df
99
- finally:
100
- connection.close()
101
-
102
- if len(df) == 0:
103
- print(f'database: {db_name}, table: {tabel_name} 查询的数据为空')
104
- # else:
105
- # now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
106
- # cost_time = int(time.time() - before_time)
107
- # if cost_time < 1:
108
- # cost_time = round(time.time() - before_time, 2)
109
- # print(f'{now}mysql ({self.host}) 表: {tabel_name} 获取数据长度: {len(df)}, 用时: {cost_time} 秒')
110
- return df
111
-
112
-
113
- def year_month_day(start_date, end_date):
114
- """
115
- 使用date_range函数和DataFrame来获取从start_date至end_date之间的所有年月日
116
- calendar.monthrange: 获取当月第一个工作日的星期值(0,6) 以及当月天数
117
- """
118
- # 替换年月日中的日, 以便即使传入当月日期也有返回值
119
- try:
120
- start_date = f'{pd.to_datetime(start_date).year}-{pd.to_datetime(start_date).month}-01'
121
- except Exception as e:
122
- print(e)
123
- return []
124
- # 使用pandas的date_range创建一个日期范围,频率为'MS'代表每月开始
125
- date_range = pd.date_range(start=start_date, end=end_date, freq='MS')
126
- # 转换格式
127
- year_months = date_range.strftime('%Y-%m').drop_duplicates().sort_values()
128
-
129
- results = []
130
- for year_month in year_months:
131
- year = re.findall(r'(\d{4})', year_month)[0]
132
- month = re.findall(r'\d{4}-(\d{2})', year_month)[0]
133
- s, d = calendar.monthrange(int(year), int(month))
134
- results.append({'起始日期': f'{year_month}-01', '结束日期': f'{year_month}-{d}'})
135
-
136
- return results # start_date至end_date之间的所有年月日
137
-
138
-
139
- def download_datas(tabel_name, save_path, start_date):
140
- username, password, host, port = get_myconf.select_config_values(target_service='company', database='mysql')
141
- print(username, password, host, port)
142
- m = MysqlUpload(username=username, password=password, host=host, port=port)
143
- m.port = port
144
- results = year_month_day(start_date=start_date, end_date='today')
145
- # print(results)
146
- for result in results:
147
- start_date = result['起始日期']
148
- end_date = result['结束日期']
149
- # print(start_date, end_date)
150
- df = m.data_to_df(db_name='市场数据2', tabel_name=tabel_name, start_date=start_date, end_date=end_date)
151
- if len(df) == 0:
152
- continue
153
- path = os.path.join(save_path, f'{tabel_name}_{str(start_date)}_{str(end_date)}.csv')
154
- df['日期'] = df['日期'].apply(lambda x: re.sub(' .*', '', str(x)))
155
- df.to_csv(path, index=False, encoding='utf-8_sig', header=True)
156
-
157
-
158
- if __name__ == '__main__':
159
- # username, password, host, port = get_myconf.select_config_values(target_service='company', database='mysql')
160
- # print(username, password, host, port)
161
-
162
- username, password, host, port = get_myconf.select_config_values(target_service='company', database='mysql')
163
- qd = QueryDatas(username=username, password=password, host=host, port=port)
164
- df = qd.data_to_df(db_name='市场数据2', tabel_name='市场排行_店铺', start_date='2024-08-13', end_date='2024-08-31')
165
- print(df)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes