mdbq 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/__version__.py ADDED
@@ -0,0 +1,3 @@
1
+ VERSION = (0, 3, 6)
2
+
3
+ __version__ = '.'.join(map(str, VERSION))
@@ -960,7 +960,22 @@ def main():
960
960
  # print(d.datas)
961
961
 
962
962
 
963
+ def update_dtypte():
964
+ """ 更新一个文件的 dtype 信息到 json 文件 """
965
+ file = '/Users/xigua/数据中心/原始文件2/月数据/流量来源/【生意参谋平台】无线店铺流量来源-2023-04-01_2023-04-30.csv'
966
+ df = pd.read_csv(file, encoding='utf-8_sig', header=0, na_filter=False)
967
+ d = DataTypes()
968
+ d.read_dtypes(
969
+ df=df,
970
+ db_name='生意参谋数据2',
971
+ collection_name='店铺来源_月数据',
972
+ is_file_dtype=False, # 关闭文件优先
973
+ )
974
+ d.dtypes_to_file()
975
+
976
+
963
977
  def upload():
978
+ """ 上传一个文件夹到数据库 """
964
979
  path = '/Users/xigua/数据中心/原始文件2/生意参谋/客户_客户概况_画像'
965
980
  db_name = '生意参谋数据2'
966
981
  collection_name = '客户_客户概况_画像'
@@ -995,7 +1010,7 @@ def upload():
995
1010
  # print(dtypes)
996
1011
  for root, dirs, files in os.walk(path, topdown=False):
997
1012
  for name in files:
998
- if '~$' in name or '.DS' in name or '.localized' in name or '.jpg' in name or '.png' in name:
1013
+ if '~$' in name or '.DS' in name or '.localized' in name or 'baidu' in name:
999
1014
  continue
1000
1015
  if name.endswith('.csv'):
1001
1016
  # print(name)
@@ -1005,7 +1020,7 @@ def upload():
1005
1020
  continue
1006
1021
  for col in df.columns.tolist():
1007
1022
  df[col] = df[col].apply(lambda x: re.sub('[="]', '', str(x)) if '="' in str(x) else x)
1008
- df.replace(to_replace=['--'], value='', regex=False, inplace=True)
1023
+ # df.replace(to_replace=['--'], value='', regex=False, inplace=True)
1009
1024
  df = dt.convert_df_cols(df=df)
1010
1025
  try:
1011
1026
  df = df.astype(dtypes)
@@ -1032,29 +1047,5 @@ if __name__ == '__main__':
1032
1047
  username, password, host, port = get_myconf.select_config_values(target_service='aliyun', database='mongodb')
1033
1048
  print(username, password, host, port)
1034
1049
 
1035
- d = DatabaseUpdate(path='/Users/xigua/Downloads')
1036
- d.upload_df(service_databases=[{'home_lx': 'mongodb'},])
1037
-
1038
1050
  # main()
1039
1051
  # upload()
1040
- # path = '/Users/xigua/数据中心/原始文件2/月数据/流量来源-自助取数-月数据'
1041
- # for root, dirs, files in os.walk(path, topdown=False):
1042
- # for name in files:
1043
- # if name.endswith('.csv') and 'baidu' not in name:
1044
- # with open(os.path.join(root, name), 'rb') as f:
1045
- # f1 = f.read()
1046
- # encod = chardet.detect(f1).get('encoding')
1047
- # print(name, encod)
1048
- # # df = pd.read_csv(os.path.join(root, name), encoding=encod, header=0, na_filter=False)
1049
- # # df.to_csv(os.path.join(root, name), index=False, encoding='utf-8_sig', header=True)
1050
-
1051
- # file = '/Users/xigua/数据中心/原始文件2/月数据/流量来源/【生意参谋平台】无线店铺流量来源-2023-04-01_2023-04-30.csv'
1052
- # df = pd.read_csv(file, encoding='utf-8_sig', header=0, na_filter=False)
1053
- # d = DataTypes()
1054
- # d.read_dtypes(
1055
- # df=df,
1056
- # db_name='生意参谋数据2',
1057
- # collection_name='店铺来源_月数据',
1058
- # is_file_dtype=False, # 关闭文件优先
1059
- # )
1060
- # d.dtypes_to_file()
@@ -0,0 +1,307 @@
1
+ # -*- coding: UTF-8 –*-
2
+ from mdbq.mongo import mongo
3
+ from mdbq.mysql import s_query
4
+ from mdbq.config import get_myconf
5
+ import datetime
6
+ from dateutil.relativedelta import relativedelta
7
+ import pandas as pd
8
+ import numpy as np
9
+ import platform
10
+ import getpass
11
+ import json
12
+ import os
13
+
14
+
15
+ class MongoDatasQuery:
16
+ """
17
+ 从 数据库 中下载数据
18
+ self.output: 数据库默认导出目录
19
+ self.is_maximize: 是否最大转化数据
20
+ """
21
+ def __init__(self, target_service):
22
+ # target_service 从哪个服务器下载数据
23
+ self.is_maximize = True
24
+ if platform.system() == 'Darwin':
25
+ self.output = os.path.join('/Users', getpass.getuser(), '数据中心/数据库导出')
26
+ elif platform.system() == 'Windows':
27
+ self.output = os.path.join('C:\\同步空间\\BaiduSyncdisk\\数据库导出')
28
+ else:
29
+ self.output = os.path.join('数据中心/数据库导出')
30
+
31
+ # 实例化一个下载类
32
+ username, password, host, port = get_myconf.select_config_values(target_service=target_service, database='mongodb')
33
+ self.download = mongo.DownMongo(username=username, password=password, host=host, port=port, save_path=None)
34
+
35
+ def tg_wxt(self):
36
+ self.download.start_date, self.download.end_date = self.months_data(num=1)
37
+ projection = {
38
+ '日期': 1,
39
+ '场景名字': 1,
40
+ '主体id': 1,
41
+ '花费': 1,
42
+ '展现量': 1,
43
+ '点击量': 1,
44
+ '总购物车数': 1,
45
+ '总成交笔数': 1,
46
+ '总成交金额': 1,
47
+ '自然流量曝光量': 1,
48
+ '直接成交笔数': 1,
49
+ '直接成交金额': 1,
50
+ }
51
+ df = self.download.data_to_df(db_name='天猫数据2', collection_name='推广数据_宝贝主体报表', projection=projection)
52
+ df.rename(columns={
53
+ '场景名字': '营销场景',
54
+ '主体id': '商品id',
55
+ '总购物车数': '加购量',
56
+ '总成交笔数': '成交笔数',
57
+ '总成交金额': '成交金额'
58
+ }, inplace=True)
59
+ df = df.astype({
60
+ '花费': float,
61
+ '展现量': int,
62
+ '点击量': int,
63
+ '加购量': int,
64
+ '成交笔数': int,
65
+ '成交金额': float,
66
+ '自然流量曝光量': int,
67
+ '直接成交笔数': int,
68
+ '直接成交金额': float,
69
+ }, errors='raise')
70
+ df.fillna(0, inplace=True)
71
+ if self.is_maximize:
72
+ df = df.groupby(['日期', '营销场景', '商品id', '花费', '展现量', '点击量'], as_index=False).agg(
73
+ **{'加购量': ('加购量', np.max),
74
+ '成交笔数': ('成交笔数', np.max),
75
+ '成交金额': ('成交金额', np.max),
76
+ '自然流量曝光量': ('自然流量曝光量', np.max),
77
+ '直接成交笔数': ('直接成交笔数', np.max),
78
+ '直接成交金额': ('直接成交金额', np.max)
79
+ }
80
+ )
81
+ else:
82
+ df = df.groupby(['日期', '营销场景', '商品id', '花费', '展现量', '点击量'], as_index=False).agg(
83
+ **{'加购量': ('加购量', np.min),
84
+ '成交笔数': ('成交笔数', np.min),
85
+ '成交金额': ('成交金额', np.min),
86
+ '自然流量曝光量': ('自然流量曝光量', np.min),
87
+ '直接成交笔数': ('直接成交笔数', np.max),
88
+ '直接成交金额': ('直接成交金额', np.max)
89
+ }
90
+ )
91
+ df.insert(loc=1, column='推广渠道', value='万相台无界版') # df中插入新列
92
+ # print(df)
93
+ return df
94
+
95
+ @staticmethod
96
+ def days_data(days, end_date=None):
97
+ """ 读取近 days 天的数据 """
98
+ if not end_date:
99
+ end_date = datetime.datetime.now()
100
+ start_date = end_date - datetime.timedelta(days=days)
101
+ return pd.to_datetime(start_date), pd.to_datetime(end_date)
102
+
103
+ @staticmethod
104
+ def months_data(num=0, end_date=None):
105
+ """ 读取近 num 个月的数据, 0 表示读取当月的数据 """
106
+ if not end_date:
107
+ end_date = datetime.datetime.now()
108
+ start_date = end_date - relativedelta(months=num) # n 月以前的今天
109
+ start_date = f'{start_date.year}-{start_date.month}-01' # 替换为 n 月以前的第一天
110
+ return pd.to_datetime(start_date), pd.to_datetime(end_date)
111
+
112
+ def as_csv(self, df, filename, path=None, encoding='utf-8_sig',
113
+ index=False, header=True, st_ascend=None, ascend=None, freq=None):
114
+ """
115
+ path: 子文件夹,可以不传,默认导出目录 self.output
116
+ st_ascend: 排序参数
117
+ ascend: 升降序
118
+ freq: 将创建子文件夹并按月分类存储, freq='Y',或 freq='M'
119
+ """
120
+ if not path:
121
+ path = self.output
122
+ else:
123
+ path = os.path.join(self.output, path)
124
+ if not os.path.exists(path):
125
+ os.makedirs(path)
126
+ if st_ascend and ascend:
127
+ try:
128
+ df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
129
+ except:
130
+ print(f'{filename}: sort_values排序参数错误!')
131
+ if freq:
132
+ if '日期' not in df.columns.tolist():
133
+ return print(f'{filename}: 数据缺少日期列,无法按日期分组')
134
+ groups = df.groupby(pd.Grouper(key='日期', freq=freq))
135
+ for name1, df in groups:
136
+ if freq == 'M':
137
+ sheet_name = name1.strftime('%Y-%m')
138
+ elif freq == 'Y':
139
+ sheet_name = name1.strftime('%Y年')
140
+ else:
141
+ sheet_name = '_未分类'
142
+ new_path = os.path.join(path, filename)
143
+ if not os.path.exists(new_path):
144
+ os.makedirs(new_path)
145
+ new_path = os.path.join(new_path, f'{filename}{sheet_name}.csv')
146
+ if st_ascend and ascend: # 这里需要重新排序一次,原因未知
147
+ try:
148
+ df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
149
+ except:
150
+ print(f'{filename}: sort_values排序参数错误!')
151
+
152
+ df.to_csv(new_path, encoding=encoding, index=index, header=header)
153
+ else:
154
+ df.to_csv(os.path.join(path, filename + '.csv'), encoding=encoding, index=index, header=header)
155
+
156
+ def as_json(self, df, filename, path=None, orient='records', force_ascii=False, st_ascend=None, ascend=None):
157
+ if not path:
158
+ path = self.output
159
+ else:
160
+ path = os.path.join(self.output, path)
161
+ if st_ascend and ascend:
162
+ try:
163
+ df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
164
+ except:
165
+ print(f'{filename}: sort_values排序参数错误!')
166
+ df.to_json(os.path.join(path, filename + '.json'),
167
+ orient=orient, force_ascii=force_ascii)
168
+
169
+ def as_excel(self, df, filename, path=None, index=False, header=True, engine='openpyxl',
170
+ freeze_panes=(1, 0), st_ascend=None, ascend=None):
171
+ if not path:
172
+ path = self.output
173
+ else:
174
+ path = os.path.join(self.output, path)
175
+ if st_ascend and ascend:
176
+ try:
177
+ df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
178
+ except:
179
+ print(f'{filename}: sort_values排序参数错误!')
180
+ df.to_excel(os.path.join(path, filename + '.xlsx'),
181
+ index=index, header=header, engine=engine, freeze_panes=freeze_panes)
182
+
183
+
184
+ class MysqlDatasQuery:
185
+ """
186
+ 从 数据库 中下载数据
187
+ self.output: 数据库默认导出目录
188
+ self.is_maximize: 是否最大转化数据
189
+ """
190
+ def __init__(self, target_service):
191
+ # target_service 从哪个服务器下载数据
192
+ self.is_maximize = True
193
+ if platform.system() == 'Darwin':
194
+ self.output = os.path.join('/Users', getpass.getuser(), '数据中心/数据库导出')
195
+ elif platform.system() == 'Windows':
196
+ self.output = os.path.join('C:\\同步空间\\BaiduSyncdisk\\数据库导出')
197
+ else:
198
+ self.output = os.path.join('数据中心/数据库导出')
199
+ self.months = 1 # 下载几个月数据, 0 表示当月, 1 是上月 1 号至今
200
+
201
+ # 实例化一个下载类
202
+ username, password, host, port = get_myconf.select_config_values(target_service=target_service, database='mysql')
203
+ self.download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
204
+
205
+ def tg_wxt(self):
206
+ start_date, end_date = self.months_data(num=self.months)
207
+ df = self.download.data_to_df(db_name='天猫数据2', tabel_name='推广数据_宝贝主体报表', start_date=start_date, end_date=end_date)
208
+ return df
209
+
210
+ @staticmethod
211
+ def days_data(days, end_date=None):
212
+ """ 读取近 days 天的数据 """
213
+ if not end_date:
214
+ end_date = datetime.datetime.now()
215
+ start_date = end_date - datetime.timedelta(days=days)
216
+ return pd.to_datetime(start_date), pd.to_datetime(end_date)
217
+
218
+ @staticmethod
219
+ def months_data(num=0, end_date=None):
220
+ """ 读取近 num 个月的数据, 0 表示读取当月的数据 """
221
+ if not end_date:
222
+ end_date = datetime.datetime.now()
223
+ start_date = end_date - relativedelta(months=num) # n 月以前的今天
224
+ start_date = f'{start_date.year}-{start_date.month}-01' # 替换为 n 月以前的第一天
225
+ return pd.to_datetime(start_date), pd.to_datetime(end_date)
226
+
227
+ def as_csv(self, df, filename, path=None, encoding='utf-8_sig',
228
+ index=False, header=True, st_ascend=None, ascend=None, freq=None):
229
+ """
230
+ path: 子文件夹,可以不传,默认导出目录 self.output
231
+ st_ascend: 排序参数
232
+ ascend: 升降序
233
+ freq: 将创建子文件夹并按月分类存储, freq='Y',或 freq='M'
234
+ """
235
+ if not path:
236
+ path = self.output
237
+ else:
238
+ path = os.path.join(self.output, path)
239
+ if not os.path.exists(path):
240
+ os.makedirs(path)
241
+ if st_ascend and ascend:
242
+ try:
243
+ df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
244
+ except:
245
+ print(f'{filename}: sort_values排序参数错误!')
246
+ if freq:
247
+ if '日期' not in df.columns.tolist():
248
+ return print(f'{filename}: 数据缺少日期列,无法按日期分组')
249
+ groups = df.groupby(pd.Grouper(key='日期', freq=freq))
250
+ for name1, df in groups:
251
+ if freq == 'M':
252
+ sheet_name = name1.strftime('%Y-%m')
253
+ elif freq == 'Y':
254
+ sheet_name = name1.strftime('%Y年')
255
+ else:
256
+ sheet_name = '_未分类'
257
+ new_path = os.path.join(path, filename)
258
+ if not os.path.exists(new_path):
259
+ os.makedirs(new_path)
260
+ new_path = os.path.join(new_path, f'{filename}{sheet_name}.csv')
261
+ if st_ascend and ascend: # 这里需要重新排序一次,原因未知
262
+ try:
263
+ df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
264
+ except:
265
+ print(f'{filename}: sort_values排序参数错误!')
266
+
267
+ df.to_csv(new_path, encoding=encoding, index=index, header=header)
268
+ else:
269
+ df.to_csv(os.path.join(path, filename + '.csv'), encoding=encoding, index=index, header=header)
270
+
271
+ def as_json(self, df, filename, path=None, orient='records', force_ascii=False, st_ascend=None, ascend=None):
272
+ if not path:
273
+ path = self.output
274
+ else:
275
+ path = os.path.join(self.output, path)
276
+ if st_ascend and ascend:
277
+ try:
278
+ df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
279
+ except:
280
+ print(f'{filename}: sort_values排序参数错误!')
281
+ df.to_json(os.path.join(path, filename + '.json'),
282
+ orient=orient, force_ascii=force_ascii)
283
+
284
+ def as_excel(self, df, filename, path=None, index=False, header=True, engine='openpyxl',
285
+ freeze_panes=(1, 0), st_ascend=None, ascend=None):
286
+ if not path:
287
+ path = self.output
288
+ else:
289
+ path = os.path.join(self.output, path)
290
+ if st_ascend and ascend:
291
+ try:
292
+ df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
293
+ except:
294
+ print(f'{filename}: sort_values排序参数错误!')
295
+ df.to_excel(os.path.join(path, filename + '.xlsx'),
296
+ index=index, header=header, engine=engine, freeze_panes=freeze_panes)
297
+
298
+
299
+ def main():
300
+ sdq = MysqlDatasQuery(target_service='company')
301
+ sdq.months = 0
302
+ df = sdq.tg_wxt()
303
+ print(df)
304
+
305
+
306
+ if __name__ == '__main__':
307
+ main()
mdbq/company/copysh.py CHANGED
@@ -9,8 +9,8 @@ import datetime
9
9
  import shutil
10
10
  import time
11
11
  import re
12
- from mdbq.bdup import bd
13
- from mdbq.pipeline import database
12
+ from mdbq.bdup import bdup
13
+ from mdbq.aggregation import database
14
14
  from mdbq.config import update_conf
15
15
  warnings.filterwarnings('ignore')
16
16
 
@@ -293,10 +293,10 @@ def main():
293
293
  res, d_path = u.check_date() # 文件中的 ch_record 值,决定是否执行更新
294
294
  if res:
295
295
  upload_path = f'windows/{str(datetime.date.today().strftime("%Y-%m"))}/{str(datetime.date.today())}'
296
- b = bdup2.BaiDu()
296
+ b = bdup.BaiDu()
297
297
  b.download_dir(local_path=d_path, remote_path=upload_path)
298
298
 
299
- dp = database.DatabaseUpdate(path=d_path)
299
+ dp = aggregation.DatabaseUpdate(path=d_path)
300
300
  dp.new_unzip(is_move=True)
301
301
  dp.cleaning(is_move=True) # 公司台式机需要移除
302
302
  d.upload_df(service_databases=[{'company': 'mysql'}])
mdbq/mysql/s_query.py ADDED
@@ -0,0 +1,165 @@
1
+ # -*- coding:utf-8 -*-
2
+ import datetime
3
+ import platform
4
+ import re
5
+ import time
6
+ from functools import wraps
7
+ import warnings
8
+ import pymysql
9
+ import numpy as np
10
+ import pandas as pd
11
+ from sqlalchemy import create_engine
12
+ import os
13
+ import calendar
14
+ from mdbq.config import get_myconf
15
+
16
+ warnings.filterwarnings('ignore')
17
+
18
+
19
+ class QueryDatas:
20
+ def __init__(self, username: str, password: str, host: str, port: int, charset: str = 'utf8mb4'):
21
+ self.username = username
22
+ self.password = password
23
+ self.host = host
24
+ self.port = port
25
+ self.config = {
26
+ 'host': self.host,
27
+ 'port': self.port,
28
+ 'user': self.username,
29
+ 'password': self.password,
30
+ 'charset': charset, # utf8mb4 支持存储四字节的UTF-8字符集
31
+ 'cursorclass': pymysql.cursors.DictCursor,
32
+ }
33
+
34
+ def data_to_df(self, db_name, tabel_name, start_date, end_date, projection=[]):
35
+ start_date = pd.to_datetime(start_date).strftime('%Y-%m-%d')
36
+ end_date = pd.to_datetime(end_date).strftime('%Y-%m-%d')
37
+ df = pd.DataFrame()
38
+
39
+ connection = pymysql.connect(**self.config) # 连接数据库
40
+ try:
41
+ with connection.cursor() as cursor:
42
+ cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
43
+ database_exists = cursor.fetchone()
44
+ if not database_exists:
45
+ print(f"Database <{db_name}>: 数据库不存在")
46
+ finally:
47
+ connection.close() # 这里要断开连接
48
+ time.sleep(0.2)
49
+
50
+ self.config.update({'database': db_name}) # 添加更新 config 字段
51
+ connection = pymysql.connect(**self.config) # 重新连接数据库
52
+ try:
53
+ with connection.cursor() as cursor:
54
+ # 1. 查询表是否存在
55
+ sql = f"SHOW TABLES LIKE '{tabel_name}'"
56
+ cursor.execute(sql)
57
+ if not cursor.fetchone():
58
+ print(f'{db_name} -> <{tabel_name}>: 表不存在')
59
+ return df
60
+
61
+ # 查询列
62
+ for col in projection:
63
+ sql = ('SELECT 1 FROM information_schema.columns WHERE table_schema = %s AND table_name = %s AND '
64
+ 'column_name = %s')
65
+ cursor.execute(sql, (db_name, {tabel_name}, col))
66
+ if cursor.fetchone() is None: # 移除不存在的列
67
+ projection.remove(col)
68
+ except Exception as e:
69
+ print(e)
70
+ return df
71
+ finally:
72
+ connection.close() # 断开连接
73
+
74
+ # before_time = time.time()
75
+ # 读取数据
76
+ self.config.update({'database': db_name})
77
+ connection = pymysql.connect(**self.config) # 重新连接数据库
78
+ try:
79
+ with connection.cursor() as cursor:
80
+ if not projection: # 如果未指定,则查询所有列,获取 cols_exist
81
+ sql = 'SELECT COLUMN_NAME FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
82
+ cursor.execute(sql, (db_name, {tabel_name}))
83
+ columns = cursor.fetchall()
84
+ cols_exist = [col['COLUMN_NAME'] for col in columns]
85
+
86
+ if '日期' in projection or '日期' in cols_exist: # 指定含日期的 projection 或者未指定 projection 但表中有日期列
87
+ sql = f"SELECT * FROM {db_name}.{tabel_name} WHERE {'日期'} BETWEEN '%s' AND '%s'" % (start_date, end_date)
88
+ elif projection: # 指定未含日期的 projection
89
+ sql = f"SELECT '%s' FROM {db_name}.{tabel_name}" % (', '.join(projection))
90
+ else: # 未指定 projection 且表中无日期
91
+ sql = f"SELECT * FROM {db_name}.{tabel_name}"
92
+ cursor.execute(sql)
93
+ rows = cursor.fetchall() # 获取查询结果
94
+ columns = [desc[0] for desc in cursor.description]
95
+ df = pd.DataFrame(rows, columns=columns)
96
+ except Exception as e:
97
+ print(f'{e} {db_name} -> <{tabel_name}>: 表不存在')
98
+ return df
99
+ finally:
100
+ connection.close()
101
+
102
+ if len(df) == 0:
103
+ print(f'database: {db_name}, table: {tabel_name} 查询的数据为空')
104
+ # else:
105
+ # now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
106
+ # cost_time = int(time.time() - before_time)
107
+ # if cost_time < 1:
108
+ # cost_time = round(time.time() - before_time, 2)
109
+ # print(f'{now}mysql ({self.host}) 表: {tabel_name} 获取数据长度: {len(df)}, 用时: {cost_time} 秒')
110
+ return df
111
+
112
+
113
+ def year_month_day(start_date, end_date):
114
+ """
115
+ 使用date_range函数和DataFrame来获取从start_date至end_date之间的所有年月日
116
+ calendar.monthrange: 获取当月第一个工作日的星期值(0,6) 以及当月天数
117
+ """
118
+ # 替换年月日中的日, 以便即使传入当月日期也有返回值
119
+ try:
120
+ start_date = f'{pd.to_datetime(start_date).year}-{pd.to_datetime(start_date).month}-01'
121
+ except Exception as e:
122
+ print(e)
123
+ return []
124
+ # 使用pandas的date_range创建一个日期范围,频率为'MS'代表每月开始
125
+ date_range = pd.date_range(start=start_date, end=end_date, freq='MS')
126
+ # 转换格式
127
+ year_months = date_range.strftime('%Y-%m').drop_duplicates().sort_values()
128
+
129
+ results = []
130
+ for year_month in year_months:
131
+ year = re.findall(r'(\d{4})', year_month)[0]
132
+ month = re.findall(r'\d{4}-(\d{2})', year_month)[0]
133
+ s, d = calendar.monthrange(int(year), int(month))
134
+ results.append({'起始日期': f'{year_month}-01', '结束日期': f'{year_month}-{d}'})
135
+
136
+ return results # start_date至end_date之间的所有年月日
137
+
138
+
139
+ def download_datas(tabel_name, save_path, start_date):
140
+ username, password, host, port = get_myconf.select_config_values(target_service='company', database='mysql')
141
+ print(username, password, host, port)
142
+ m = MysqlUpload(username=username, password=password, host=host, port=port)
143
+ m.port = port
144
+ results = year_month_day(start_date=start_date, end_date='today')
145
+ # print(results)
146
+ for result in results:
147
+ start_date = result['起始日期']
148
+ end_date = result['结束日期']
149
+ # print(start_date, end_date)
150
+ df = m.data_to_df(db_name='市场数据2', tabel_name=tabel_name, start_date=start_date, end_date=end_date)
151
+ if len(df) == 0:
152
+ continue
153
+ path = os.path.join(save_path, f'{tabel_name}_{str(start_date)}_{str(end_date)}.csv')
154
+ df['日期'] = df['日期'].apply(lambda x: re.sub(' .*', '', str(x)))
155
+ df.to_csv(path, index=False, encoding='utf-8_sig', header=True)
156
+
157
+
158
+ if __name__ == '__main__':
159
+ # username, password, host, port = get_myconf.select_config_values(target_service='company', database='mysql')
160
+ # print(username, password, host, port)
161
+
162
+ username, password, host, port = get_myconf.select_config_values(target_service='company', database='mysql')
163
+ qd = QueryDatas(username=username, password=password, host=host, port=port)
164
+ df = qd.data_to_df(db_name='市场数据2', tabel_name='市场排行_店铺', start_date='2024-08-13', end_date='2024-08-31')
165
+ print(df)
@@ -1,9 +1,6 @@
1
1
  import requests
2
2
  import kdl
3
3
  import warnings
4
- import getpass
5
- import platform
6
- import pathlib
7
4
  import os
8
5
  import requests
9
6
  import datetime
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 0.0.4
3
+ Version: 0.0.6
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,12 +1,14 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
+ mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
2
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
3
- mdbq/aggregation/aggregation.py,sha256=HtaUlq6abeyT4V-C3jREbjJGj5L8cTKcDfDm4c4UNpI,60133
4
+ mdbq/aggregation/aggregation.py,sha256=RHQa2rs4fimRvJzluujErg6I8fn7s9q1-kwC2bPZohE,59439
5
+ mdbq/aggregation/query_data.py,sha256=avbMc36kCuyTjLOXMzIEIKSc0x227c1t-Ydf0vdkViM,13756
4
6
  mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
5
- mdbq/bdup/bd.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
7
+ mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
6
8
  mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
7
9
  mdbq/clean/data_clean.py,sha256=33OmeQFl9AW21P5EOay52W_S8DF96H5oHwCg4fSuBxA,85359
8
10
  mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
9
- mdbq/company/copysh.py,sha256=U_m6C6dBRGPhuw2prgiFgfnTIiTXxyFJV0K6OE_PY9g,15602
11
+ mdbq/company/copysh.py,sha256=0exynzeqf85gCBQXAgKycVxddMhr0TjkFcBP_NK0QTA,15609
10
12
  mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
11
13
  mdbq/config/get_myconf.py,sha256=q6Pylsnh4-MsHP9JcX8IdlnGVWikz9hio1HI_qh6Wvs,6171
12
14
  mdbq/config/update_conf.py,sha256=YjGjjRchu5BcrmLJkoLjHEF2TbGOmsgCWX4LroXOYWQ,3455
@@ -16,15 +18,16 @@ mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
16
18
  mdbq/mongo/mongo.py,sha256=hF93-kP2lxK4WY1KCdBBszLQ_I7W0mQQxZ7t4qU2w3A,32930
17
19
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
18
20
  mdbq/mysql/mysql.py,sha256=H9onFYKSYRjdXghK_29Aj7vgvUgDHexJjIECrdxLbE0,29925
21
+ mdbq/mysql/s_query.py,sha256=P0QNwJL3ytyN75c8Qny1xfxrOUI4ks-FuRghNsyMWic,7409
19
22
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
23
+ mdbq/other/porxy.py,sha256=UHfgEyXugogvXgsG68a7QouUCKaohTKKkI4RN-kYSdQ,4961
20
24
  mdbq/other/pov_city.py,sha256=AEOmCOzOwyjHi9LLZWPKi6DUuSC-_M163664I52u9qw,21050
21
25
  mdbq/other/ua_sj.py,sha256=JuVYzc_5QZ9s_oQSrTHVKkQv4S_7-CWx4oIKOARn_9U,22178
22
- mdbq/other/xigua_porxy.py,sha256=zTOxsdkdDAyGfHWPUm_7WIztjrGExONAwvPzTaC7Rho,5007
23
26
  mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
24
27
  mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
25
28
  mdbq/pbix/refresh_all.py,sha256=wulHs4rivf4Mi0Pii2QR5Nk9-TBcvSwnCB_WH9QULKE,5939
26
29
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
27
- mdbq-0.0.4.dist-info/METADATA,sha256=5CO9nsAMDCP49m1fNFh_UQ-tIyPIgBFdUl-rQd8HlOc,245
28
- mdbq-0.0.4.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
29
- mdbq-0.0.4.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
30
- mdbq-0.0.4.dist-info/RECORD,,
30
+ mdbq-0.0.6.dist-info/METADATA,sha256=G_RkhGDJMmXpNdrECY4LBT0f6HF9sHpfU-LYxNUzWPk,245
31
+ mdbq-0.0.6.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
32
+ mdbq-0.0.6.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
33
+ mdbq-0.0.6.dist-info/RECORD,,
File without changes
File without changes