mdbq 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +3 -3
- mdbq/aggregation/mysql_types.py +31 -24
- mdbq/aggregation/query_data.py +29 -29
- mdbq/config/products.py +1 -1
- mdbq/mysql/mysql.py +28 -28
- mdbq/mysql/s_query.py +19 -19
- {mdbq-1.0.1.dist-info → mdbq-1.0.3.dist-info}/METADATA +1 -1
- {mdbq-1.0.1.dist-info → mdbq-1.0.3.dist-info}/RECORD +10 -10
- {mdbq-1.0.1.dist-info → mdbq-1.0.3.dist-info}/WHEEL +0 -0
- {mdbq-1.0.1.dist-info → mdbq-1.0.3.dist-info}/top_level.txt +0 -0
mdbq/aggregation/aggregation.py
CHANGED
@@ -685,7 +685,7 @@ class DatabaseUpdate:
|
|
685
685
|
collection_name=collection_name,
|
686
686
|
is_file_dtype=True, # 默认本地文件优先: True
|
687
687
|
)
|
688
|
-
m.df_to_mysql(df=df, db_name=db_name,
|
688
|
+
m.df_to_mysql(df=df, db_name=db_name, table_name=collection_name)
|
689
689
|
df_to_json.as_json_file() # 写入 json 文件, 包含数据的 dtypes 信息
|
690
690
|
|
691
691
|
def new_unzip(self, path=None, is_move=None):
|
@@ -896,8 +896,8 @@ def upload(path, db_name, collection_name):
|
|
896
896
|
df = df.astype(dtypes)
|
897
897
|
|
898
898
|
d.df_to_mongo(df=df, db_name=db_name, collection_name=collection_name)
|
899
|
-
m.df_to_mysql(df=df, db_name=db_name,
|
900
|
-
nas.df_to_mysql(df=df, db_name=db_name,
|
899
|
+
m.df_to_mysql(df=df, db_name=db_name, table_name=collection_name)
|
900
|
+
nas.df_to_mysql(df=df, db_name=db_name, table_name=collection_name)
|
901
901
|
except Exception as e:
|
902
902
|
print(name, e)
|
903
903
|
if d.client:
|
mdbq/aggregation/mysql_types.py
CHANGED
@@ -64,18 +64,18 @@ class DataTypes:
|
|
64
64
|
json_ = json.load(f)
|
65
65
|
self.datas.update(json_)
|
66
66
|
|
67
|
-
def get_mysql_types(self, cl, dtypes, db_name,
|
67
|
+
def get_mysql_types(self, cl, dtypes, db_name, table_name, is_file_dtype=True):
|
68
68
|
""" 更新 mysql 的 types 信息到 json 文件 """
|
69
69
|
if cl in self.datas.keys():
|
70
70
|
if db_name in list(self.datas[cl].keys()): # ['京东数据2', '天猫数据2', '生意参谋数据2', '生意经2']
|
71
|
-
if
|
71
|
+
if table_name in list(self.datas[cl][db_name].keys()):
|
72
72
|
if is_file_dtype: # 旧数据优先
|
73
73
|
# # 用 dtypes 更新, 允许手动指定 json 文件里面的数据类型
|
74
|
-
dtypes[cl][db_name][
|
74
|
+
dtypes[cl][db_name][table_name].update(self.datas[cl][db_name][table_name])
|
75
75
|
# 将 dtypes 更新进去,使 self.datas 包含新旧信息
|
76
|
-
self.datas[cl][db_name][
|
76
|
+
self.datas[cl][db_name][table_name].update(dtypes[cl][db_name][table_name])
|
77
77
|
else: # 新数据优先
|
78
|
-
self.datas[cl][db_name][
|
78
|
+
self.datas[cl][db_name][table_name].update(dtypes[cl][db_name][table_name])
|
79
79
|
else:
|
80
80
|
if is_file_dtype: # 旧数据优先
|
81
81
|
dtypes[cl][db_name].update(self.datas[cl][db_name])
|
@@ -125,30 +125,30 @@ class DataTypes:
|
|
125
125
|
)
|
126
126
|
time.sleep(1)
|
127
127
|
|
128
|
-
def load_dtypes(self, db_name,
|
128
|
+
def load_dtypes(self, db_name, table_name, cl='mysql', ):
|
129
129
|
"""
|
130
130
|
mysql.py 程序从本地文件中读取 dtype 信息
|
131
131
|
如果缺失 dtypes 信息,则执行 mysql_all_dtypes 以便更新所有数据库 dtypes 信息到 json 文件
|
132
132
|
"""
|
133
133
|
if cl in self.datas.keys():
|
134
134
|
if db_name in list(self.datas[cl].keys()):
|
135
|
-
if
|
136
|
-
return self.datas[cl][db_name][
|
135
|
+
if table_name in list(self.datas[cl][db_name].keys()):
|
136
|
+
return self.datas[cl][db_name][table_name]
|
137
137
|
else:
|
138
|
-
print(f'不存在的集合名信息: {
|
139
|
-
mysql_all_dtypes()
|
138
|
+
print(f'不存在的集合名信息: {table_name}, 文件位置: {self.json_file}')
|
139
|
+
mysql_all_dtypes(db_name=db_name, table_name=table_name) # 更新一个表的 dtypes
|
140
140
|
return {}
|
141
141
|
else:
|
142
142
|
print(f'不存在的数据库信息: {db_name}, 文件位置: {self.json_file}')
|
143
|
-
mysql_all_dtypes() #
|
143
|
+
mysql_all_dtypes(db_name=db_name) # 更新一个数据库的 dtypes
|
144
144
|
return {}
|
145
145
|
else:
|
146
146
|
print(f'不存在的数据分类: {cl}, 文件位置: {self.json_file}')
|
147
|
-
mysql_all_dtypes() #
|
147
|
+
mysql_all_dtypes() # 更新所有数据库所有数据表的 dtypes 信息到本地 json
|
148
148
|
return {}
|
149
149
|
|
150
150
|
|
151
|
-
def mysql_all_dtypes(path=None):
|
151
|
+
def mysql_all_dtypes(db_name=None, table_name=None, path=None):
|
152
152
|
"""
|
153
153
|
更新笔记本 mysql 中所有数据库的 dtypes 信息到本地 json
|
154
154
|
"""
|
@@ -186,18 +186,18 @@ def mysql_all_dtypes(path=None):
|
|
186
186
|
# '聚合数据',
|
187
187
|
# ]
|
188
188
|
results = []
|
189
|
-
for
|
190
|
-
config.update({'database':
|
189
|
+
for db_ in db_name_lists:
|
190
|
+
config.update({'database': db_}) # 添加更新 config 字段
|
191
191
|
connection = pymysql.connect(**config) # 连接数据库
|
192
192
|
try:
|
193
193
|
with connection.cursor() as cursor:
|
194
|
-
sql = f"SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{
|
194
|
+
sql = f"SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{db_}';"
|
195
195
|
sql = "SHOW TABLES;"
|
196
196
|
cursor.execute(sql)
|
197
197
|
table_name = cursor.fetchall()
|
198
198
|
for table in table_name:
|
199
199
|
for k, v in table.items():
|
200
|
-
results.append({
|
200
|
+
results.append({db_: v})
|
201
201
|
except:
|
202
202
|
pass
|
203
203
|
finally:
|
@@ -206,21 +206,28 @@ def mysql_all_dtypes(path=None):
|
|
206
206
|
|
207
207
|
d = DataTypes()
|
208
208
|
for result in results:
|
209
|
-
for
|
210
|
-
|
211
|
-
|
209
|
+
for db_n, table_n in result.items():
|
210
|
+
if db_name and table_name: # 下载一个指定的数据表
|
211
|
+
if db_name != db_n or table_name != table_n:
|
212
|
+
continue
|
213
|
+
elif db_name: # 下载一个数据库的所有数据表
|
214
|
+
if db_name != db_n:
|
215
|
+
continue
|
216
|
+
# 如果 db_name 和 table_name 都不指定,则下载所有数据库的所有数据表
|
217
|
+
print(f'获取列信息 数据库: < {db_n} >, 数据表: < {table_n} >')
|
218
|
+
# d.mysql_dtypes_to_json(db_name=db_n, table_name=table_n, path=path)
|
212
219
|
sq = s_query.QueryDatas(username=username, password=password, host=host, port=port)
|
213
220
|
# 获取数据表的指定列, 返回列表
|
214
221
|
# [{'视频bv号': 'BV1Dm4y1S7BU', '下载进度': 1}, {'视频bv号': 'BV1ov411c7US', '下载进度': 1}]
|
215
|
-
name_type = sq.dtypes_to_list(db_name=
|
222
|
+
name_type = sq.dtypes_to_list(db_name=db_n, table_name=table_n)
|
216
223
|
if name_type:
|
217
224
|
dtypes = {item['COLUMN_NAME']: item['COLUMN_TYPE'] for item in name_type}
|
218
|
-
dtypes = {'mysql': {
|
225
|
+
dtypes = {'mysql': {db_n: {table_n: dtypes}}}
|
219
226
|
d.get_mysql_types(
|
220
227
|
dtypes=dtypes,
|
221
228
|
cl='mysql',
|
222
|
-
db_name=
|
223
|
-
|
229
|
+
db_name=db_n,
|
230
|
+
table_name=table_n,
|
224
231
|
is_file_dtype=True
|
225
232
|
)
|
226
233
|
else:
|
mdbq/aggregation/query_data.py
CHANGED
@@ -100,7 +100,7 @@ class MysqlDatasQuery:
|
|
100
100
|
}
|
101
101
|
df = self.download.data_to_df(
|
102
102
|
db_name='天猫数据2',
|
103
|
-
|
103
|
+
table_name='推广数据_宝贝主体报表',
|
104
104
|
start_date=start_date,
|
105
105
|
end_date=end_date,
|
106
106
|
projection=projection,
|
@@ -119,12 +119,12 @@ class MysqlDatasQuery:
|
|
119
119
|
'订单数': 1,
|
120
120
|
'退货量': 1,
|
121
121
|
'退款额': 1,
|
122
|
-
'
|
123
|
-
'
|
122
|
+
'退款额(发货后)': 1,
|
123
|
+
'退货量(发货后)': 1,
|
124
124
|
}
|
125
125
|
df = self.download.data_to_df(
|
126
126
|
db_name='生意经2',
|
127
|
-
|
127
|
+
table_name='宝贝指标',
|
128
128
|
start_date=start_date,
|
129
129
|
end_date=end_date,
|
130
130
|
projection=projection,
|
@@ -135,7 +135,7 @@ class MysqlDatasQuery:
|
|
135
135
|
""" 用生意经日数据制作商品 id 和编码对照表 """
|
136
136
|
data_values = self.download.columns_to_list(
|
137
137
|
db_name='生意经2',
|
138
|
-
|
138
|
+
table_name='宝贝指标',
|
139
139
|
columns_name=['宝贝id', '商家编码', '行业类目'],
|
140
140
|
)
|
141
141
|
df = pd.DataFrame(data=data_values)
|
@@ -145,7 +145,7 @@ class MysqlDatasQuery:
|
|
145
145
|
""" 用生意经日数据制作商品 id 和编码对照表 """
|
146
146
|
data_values = self.download.columns_to_list(
|
147
147
|
db_name='属性设置2',
|
148
|
-
|
148
|
+
table_name='商品素材导出',
|
149
149
|
columns_name=['日期', '商品id', '商品白底图', '方版场景图'],
|
150
150
|
)
|
151
151
|
df = pd.DataFrame(data=data_values)
|
@@ -166,7 +166,7 @@ class MysqlDatasQuery:
|
|
166
166
|
}
|
167
167
|
df = self.download.data_to_df(
|
168
168
|
db_name='生意参谋数据2',
|
169
|
-
|
169
|
+
table_name='店铺来源_日数据',
|
170
170
|
start_date=start_date,
|
171
171
|
end_date=end_date,
|
172
172
|
projection=projection,
|
@@ -177,7 +177,7 @@ class MysqlDatasQuery:
|
|
177
177
|
""" 电商定价 """
|
178
178
|
data_values = self.download.columns_to_list(
|
179
179
|
db_name='属性设置2',
|
180
|
-
|
180
|
+
table_name='电商定价',
|
181
181
|
columns_name=['日期', '款号', '年份季节', '吊牌价', '商家平台', '成本价', '天猫页面价', '天猫中促价'],
|
182
182
|
)
|
183
183
|
df = pd.DataFrame(data=data_values)
|
@@ -207,12 +207,12 @@ class GroupBy:
|
|
207
207
|
self.output = os.path.join('数据中心/数据库导出')
|
208
208
|
self.data_tgyj = {} # 推广综合聚合数据表
|
209
209
|
|
210
|
-
def groupby(self, df,
|
210
|
+
def groupby(self, df, table_name, is_maximize=True):
|
211
211
|
"""
|
212
212
|
self.is_maximize: 是否最大转化数据
|
213
213
|
"""
|
214
214
|
|
215
|
-
if '宝贝主体报表' in
|
215
|
+
if '宝贝主体报表' in table_name:
|
216
216
|
df.rename(columns={
|
217
217
|
'场景名字': '营销场景',
|
218
218
|
'主体id': '商品id',
|
@@ -265,11 +265,11 @@ class GroupBy:
|
|
265
265
|
)
|
266
266
|
self.data_tgyj.update(
|
267
267
|
{
|
268
|
-
|
268
|
+
table_name: df_new,
|
269
269
|
}
|
270
270
|
)
|
271
271
|
return df
|
272
|
-
elif '宝贝指标' in
|
272
|
+
elif '宝贝指标' in table_name:
|
273
273
|
""" 聚合时不可以加商家编码,编码有些是空白,有些是 0 """
|
274
274
|
df.fillna(0, inplace=True)
|
275
275
|
# df = df[(df['销售额'] != 0) | (df['退款额'] != 0)] # 注释掉, 因为后续使用生意经作为基准合并推广表,需确保所有商品id 齐全
|
@@ -279,8 +279,8 @@ class GroupBy:
|
|
279
279
|
'订单数': ('订单数', np.min),
|
280
280
|
'退货量': ('退货量', np.max),
|
281
281
|
'退款额': ('退款额', np.max),
|
282
|
-
'
|
283
|
-
'
|
282
|
+
'退款额(发货后)': ('退款额(发货后)', np.max),
|
283
|
+
'退货量(发货后)': ('退货量(发货后)', np.max),
|
284
284
|
}
|
285
285
|
)
|
286
286
|
df['件均价'] = df.apply(lambda x: x['销售额'] / x['销售量'] if x['销售量'] > 0 else 0, axis=1).round(
|
@@ -294,13 +294,13 @@ class GroupBy:
|
|
294
294
|
)
|
295
295
|
self.data_tgyj.update(
|
296
296
|
{
|
297
|
-
|
297
|
+
table_name: df[['日期', '宝贝id', '销售额', '销售量', '退款额(发货后)', '退货量(发货后)']],
|
298
298
|
}
|
299
299
|
)
|
300
300
|
return df
|
301
|
-
elif '店铺来源_日数据' in
|
301
|
+
elif '店铺来源_日数据' in table_name:
|
302
302
|
return df
|
303
|
-
elif '商品id编码表' in
|
303
|
+
elif '商品id编码表' in table_name:
|
304
304
|
df.drop_duplicates(subset='宝贝id', keep='last', inplace=True, ignore_index=True)
|
305
305
|
# df['行业类目'] = df['行业类目'].apply(lambda x: re.sub(' ', '', x))
|
306
306
|
try:
|
@@ -315,11 +315,11 @@ class GroupBy:
|
|
315
315
|
df = df[(df['宝贝id'] != '973') & (df['宝贝id'] != '973')]
|
316
316
|
self.data_tgyj.update(
|
317
317
|
{
|
318
|
-
|
318
|
+
table_name: df[['宝贝id', '商家编码']],
|
319
319
|
}
|
320
320
|
)
|
321
321
|
return df
|
322
|
-
elif '商品id图片对照表' in
|
322
|
+
elif '商品id图片对照表' in table_name:
|
323
323
|
df['商品id'] = df['商品id'].astype('int64')
|
324
324
|
df['日期'] = df['日期'].astype('datetime64[ns]')
|
325
325
|
df = df[(df['商品白底图'] != '0') | (df['方版场景图'] != '0')]
|
@@ -337,21 +337,21 @@ class GroupBy:
|
|
337
337
|
df.sort_values(by='商品id', ascending=False, ignore_index=True, inplace=True) # ascending=False 降序排列
|
338
338
|
self.data_tgyj.update(
|
339
339
|
{
|
340
|
-
|
340
|
+
table_name: df[['商品id', '商品图片']],
|
341
341
|
}
|
342
342
|
)
|
343
343
|
return df
|
344
|
-
elif '商品成本' in
|
344
|
+
elif '商品成本' in table_name:
|
345
345
|
df.sort_values(by=['款号', '日期'], ascending=[False, True], ignore_index=True, inplace=True)
|
346
346
|
df.drop_duplicates(subset=['款号'], keep='last', inplace=True, ignore_index=True)
|
347
347
|
self.data_tgyj.update(
|
348
348
|
{
|
349
|
-
|
349
|
+
table_name: df[['款号', '成本价']],
|
350
350
|
}
|
351
351
|
)
|
352
352
|
return df
|
353
353
|
else:
|
354
|
-
print(f'<{
|
354
|
+
print(f'<{table_name}>: Groupby 类尚未配置,数据为空')
|
355
355
|
return pd.DataFrame({})
|
356
356
|
|
357
357
|
def performance(self, bb_tg=True):
|
@@ -512,14 +512,14 @@ def data_aggregation(service_databases=[{}]):
|
|
512
512
|
},
|
513
513
|
]
|
514
514
|
for items in data_dict:
|
515
|
-
db_name,
|
516
|
-
df = g.groupby(df=df,
|
517
|
-
# g.as_csv(df=df, filename=
|
518
|
-
m.df_to_mysql(df=df, db_name=db_name,
|
515
|
+
db_name, table_name, df = items['数据库名'], items['集合名'], items['数据主体']
|
516
|
+
df = g.groupby(df=df, table_name=table_name, is_maximize=True) # 2. 聚合数据
|
517
|
+
# g.as_csv(df=df, filename=table_name + '.csv')
|
518
|
+
m.df_to_mysql(df=df, db_name=db_name, table_name=table_name) # 3. 回传数据库
|
519
519
|
res = g.performance(bb_tg=True) # 盈亏表,依赖其他表,单独做
|
520
|
-
m.df_to_mysql(df=res, db_name='聚合数据',
|
520
|
+
m.df_to_mysql(df=res, db_name='聚合数据', table_name='_全店商品销售')
|
521
521
|
res = g.performance(bb_tg=False) # 盈亏表,依赖其他表,单独做
|
522
|
-
m.df_to_mysql(df=res, db_name='聚合数据',
|
522
|
+
m.df_to_mysql(df=res, db_name='聚合数据', table_name='_推广商品销售')
|
523
523
|
|
524
524
|
# optimize_data.op_data(service_databases=service_databases, days=3650) # 立即启动对聚合数据的清理工作
|
525
525
|
|
mdbq/config/products.py
CHANGED
mdbq/mysql/mysql.py
CHANGED
@@ -36,11 +36,11 @@ class MysqlUpload:
|
|
36
36
|
'cursorclass': pymysql.cursors.DictCursor,
|
37
37
|
}
|
38
38
|
|
39
|
-
def df_to_mysql(self, df,
|
39
|
+
def df_to_mysql(self, df, table_name, db_name='远程数据源', drop_duplicates=False):
|
40
40
|
"""
|
41
41
|
将 df 写入数据库
|
42
42
|
db_name: 数据库名称
|
43
|
-
|
43
|
+
table_name: 集合/表名称
|
44
44
|
drop_duplicates:仅限于聚合数据使用,其他情况不要设置
|
45
45
|
"""
|
46
46
|
cv = converter.DataFrameConverter()
|
@@ -67,18 +67,18 @@ class MysqlUpload:
|
|
67
67
|
connection = pymysql.connect(**self.config) # 重新连接数据库
|
68
68
|
with connection.cursor() as cursor:
|
69
69
|
# 1. 查询表, 不存在则创建一个空表
|
70
|
-
sql = f"SHOW TABLES LIKE '{
|
70
|
+
sql = f"SHOW TABLES LIKE '{table_name}';" # 有特殊字符不需转义
|
71
71
|
cursor.execute(sql)
|
72
72
|
if not cursor.fetchone():
|
73
|
-
sql = f"CREATE TABLE IF NOT EXISTS `{
|
73
|
+
sql = f"CREATE TABLE IF NOT EXISTS `{table_name}` (id INT AUTO_INCREMENT PRIMARY KEY)"
|
74
74
|
cursor.execute(sql)
|
75
|
-
print(f'创建 mysql 表: {
|
75
|
+
print(f'创建 mysql 表: {table_name}')
|
76
76
|
|
77
77
|
# 2. 列数据类型转换,将 df 数据类型转换为 mysql 的数据类型
|
78
|
-
dtypes = self.convert_dtypes(df=df, db_name=db_name,
|
78
|
+
dtypes = self.convert_dtypes(df=df, db_name=db_name, table_name=table_name)
|
79
79
|
|
80
80
|
# 有特殊字符不需转义
|
81
|
-
sql = f"SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '{db_name}' AND TABLE_NAME = '{
|
81
|
+
sql = f"SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '{db_name}' AND TABLE_NAME = '{table_name}';"
|
82
82
|
cursor.execute(sql)
|
83
83
|
col_exist = [item['COLUMN_NAME'] for item in cursor.fetchall()]
|
84
84
|
cols = df.columns.tolist()
|
@@ -88,16 +88,16 @@ class MysqlUpload:
|
|
88
88
|
for col in col_not_exist:
|
89
89
|
try:
|
90
90
|
# 创建列,需转义
|
91
|
-
sql = f"ALTER TABLE `{
|
91
|
+
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} DEFAULT NULL;"
|
92
92
|
cursor.execute(sql)
|
93
93
|
print(f"添加列: {col}({dtypes[col]})") # 添加列并指定数据类型
|
94
94
|
|
95
95
|
# 创建索引
|
96
96
|
if col == '日期':
|
97
|
-
cursor.execute(f"SHOW INDEXES FROM `{
|
97
|
+
cursor.execute(f"SHOW INDEXES FROM `{table_name}` WHERE `Column_name` = '{col}'")
|
98
98
|
result = cursor.fetchone() # 检查索引是否存在
|
99
99
|
if not result:
|
100
|
-
cursor.execute(f"CREATE INDEX index_name ON `{
|
100
|
+
cursor.execute(f"CREATE INDEX index_name ON `{table_name}`(`{col}`)")
|
101
101
|
except:
|
102
102
|
pass
|
103
103
|
connection.commit() # 提交事务
|
@@ -107,13 +107,13 @@ class MysqlUpload:
|
|
107
107
|
dates = df['日期'].values.tolist()
|
108
108
|
start_date = pd.to_datetime(min(dates)).strftime('%Y-%m-%d')
|
109
109
|
end_date = (pd.to_datetime(max(dates)) + datetime.timedelta(days=1)).strftime('%Y-%m-%d')
|
110
|
-
sql = f"DELETE FROM `{
|
110
|
+
sql = f"DELETE FROM `{table_name}` WHERE {'日期'} BETWEEN '%s' AND '%s'" % (start_date, end_date)
|
111
111
|
cursor.execute(sql)
|
112
112
|
connection.commit()
|
113
113
|
|
114
114
|
# 5. 更新插入数据
|
115
115
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
|
116
|
-
print(f'{now}正在更新 mysql ({self.host}:{self.port}) {db_name}/{
|
116
|
+
print(f'{now}正在更新 mysql ({self.host}:{self.port}) {db_name}/{table_name}')
|
117
117
|
datas = df.to_dict(orient='records')
|
118
118
|
for data in datas:
|
119
119
|
try:
|
@@ -126,17 +126,17 @@ class MysqlUpload:
|
|
126
126
|
condition = ' AND '.join(condition) # 构建查询条件
|
127
127
|
# print(condition)
|
128
128
|
|
129
|
-
sql = f"SELECT {cols} FROM `{
|
129
|
+
sql = f"SELECT {cols} FROM `{table_name}` WHERE {condition}"
|
130
130
|
cursor.execute(sql)
|
131
131
|
result = cursor.fetchall() # 获取查询结果, 如果有结果返回 list,没有则返回空元组 tuple
|
132
132
|
if not result: # 数据不存在则插入
|
133
|
-
sql = f"INSERT INTO `{
|
133
|
+
sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({values});"
|
134
134
|
cursor.execute(sql)
|
135
135
|
except:
|
136
136
|
pass
|
137
137
|
connection.commit() # 提交事务
|
138
138
|
|
139
|
-
def convert_dtypes(self, df, db_name,
|
139
|
+
def convert_dtypes(self, df, db_name, table_name):
|
140
140
|
"""
|
141
141
|
根据本地已经存在的记录着 mysql dtypes 的 json 文件转换 df 的类型为 mysql 专有的数据类型
|
142
142
|
允许通过 json 文件指定列的数据类型
|
@@ -148,7 +148,7 @@ class MysqlUpload:
|
|
148
148
|
# path = set_support.SetSupport(dirname='support').dirname
|
149
149
|
d = mysql_types.DataTypes()
|
150
150
|
# 从本地文件中读取 dtype 信息
|
151
|
-
dtypes = d.load_dtypes(cl='mysql', db_name=db_name,
|
151
|
+
dtypes = d.load_dtypes(cl='mysql', db_name=db_name, table_name=table_name)
|
152
152
|
# 可能会因为没有 json 文件, 返回 None
|
153
153
|
if dtypes:
|
154
154
|
# 按照文件记录更新 dtypes
|
@@ -199,7 +199,7 @@ class MysqlUpload:
|
|
199
199
|
return 'mediumtext'
|
200
200
|
|
201
201
|
# @try_except
|
202
|
-
def read_mysql(self,
|
202
|
+
def read_mysql(self, table_name, start_date, end_date, db_name='远程数据源', ):
|
203
203
|
start_date = pd.to_datetime(start_date).strftime('%Y-%m-%d')
|
204
204
|
end_date = pd.to_datetime(end_date).strftime('%Y-%m-%d')
|
205
205
|
df = pd.DataFrame()
|
@@ -214,7 +214,7 @@ class MysqlUpload:
|
|
214
214
|
return df
|
215
215
|
else:
|
216
216
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
|
217
|
-
print(f'{now}mysql 正在查询表: {
|
217
|
+
print(f'{now}mysql 正在查询表: {table_name}, 范围: {start_date}~{end_date}')
|
218
218
|
except:
|
219
219
|
return df
|
220
220
|
finally:
|
@@ -227,25 +227,25 @@ class MysqlUpload:
|
|
227
227
|
try:
|
228
228
|
with connection.cursor() as cursor:
|
229
229
|
# 获取指定日期范围的数据
|
230
|
-
sql = f"SELECT * FROM {db_name}.{
|
230
|
+
sql = f"SELECT * FROM {db_name}.{table_name} WHERE {'日期'} BETWEEN '%s' AND '%s'" % (start_date, end_date)
|
231
231
|
cursor.execute(sql)
|
232
232
|
rows = cursor.fetchall() # 获取查询结果
|
233
233
|
columns = [desc[0] for desc in cursor.description]
|
234
234
|
df = pd.DataFrame(rows, columns=columns) # 转为 df
|
235
235
|
except Exception as e:
|
236
|
-
print(f'{e} {db_name} -> {
|
236
|
+
print(f'{e} {db_name} -> {table_name} 表不存在')
|
237
237
|
return df
|
238
238
|
finally:
|
239
239
|
connection.close()
|
240
240
|
|
241
241
|
if len(df) == 0:
|
242
|
-
print(f'database: {db_name}, table: {
|
242
|
+
print(f'database: {db_name}, table: {table_name} 查询的数据为空')
|
243
243
|
else:
|
244
244
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
|
245
245
|
cost_time = int(time.time() - before_time)
|
246
246
|
if cost_time < 1:
|
247
247
|
cost_time = round(time.time() - before_time, 2)
|
248
|
-
print(f'{now}mysql ({self.host}) 表: {
|
248
|
+
print(f'{now}mysql ({self.host}) 表: {table_name} 获取数据长度: {len(df)}, 用时: {cost_time} 秒')
|
249
249
|
return df
|
250
250
|
|
251
251
|
def upload_pandas(self, update_path, db_name, days=None):
|
@@ -281,7 +281,7 @@ class MysqlUpload:
|
|
281
281
|
df = df[df['日期'] >= start_date]
|
282
282
|
if len(df) == 0:
|
283
283
|
continue
|
284
|
-
self.df_to_mysql(df=df, db_name=db_name,
|
284
|
+
self.df_to_mysql(df=df, db_name=db_name, table_name=root_file)
|
285
285
|
elif os.path.isfile(f_path):
|
286
286
|
if f_path.endswith('.csv') and 'baidu' not in f_path:
|
287
287
|
df = pd.read_csv(f_path, encoding='utf-8_sig', header=0, na_filter=False)
|
@@ -295,7 +295,7 @@ class MysqlUpload:
|
|
295
295
|
if len(df) == 0:
|
296
296
|
continue
|
297
297
|
table = f'{os.path.splitext(root_file)[0]}_f' # 这里定义了文件表会加 _f 后缀
|
298
|
-
self.df_to_mysql(df=df, db_name=db_name,
|
298
|
+
self.df_to_mysql(df=df, db_name=db_name, table_name=table)
|
299
299
|
|
300
300
|
|
301
301
|
class OptimizeDatas:
|
@@ -621,7 +621,7 @@ def year_month_day(start_date, end_date):
|
|
621
621
|
return results # start_date至end_date之间的所有年月日
|
622
622
|
|
623
623
|
|
624
|
-
def download_datas(
|
624
|
+
def download_datas(table_name, save_path, start_date):
|
625
625
|
username, password, host, port = get_myconf.select_config_values(target_service='home_lx', database='mysql')
|
626
626
|
print(username, password, host, port)
|
627
627
|
m = MysqlUpload(username=username, password=password, host=host, port=port)
|
@@ -632,10 +632,10 @@ def download_datas(tabel_name, save_path, start_date):
|
|
632
632
|
start_date = result['起始日期']
|
633
633
|
end_date = result['结束日期']
|
634
634
|
# print(start_date, end_date)
|
635
|
-
df = m.read_mysql(db_name='天猫数据1',
|
635
|
+
df = m.read_mysql(db_name='天猫数据1', table_name=table_name, start_date=start_date, end_date=end_date)
|
636
636
|
if len(df) == 0:
|
637
637
|
continue
|
638
|
-
path = os.path.join(save_path, f'{
|
638
|
+
path = os.path.join(save_path, f'{table_name}_{str(start_date)}_{str(end_date)}.csv')
|
639
639
|
df['日期'] = df['日期'].apply(lambda x: re.sub(' .*', '', str(x)))
|
640
640
|
df.to_csv(path, index=False, encoding='utf-8_sig', header=True)
|
641
641
|
|
@@ -647,7 +647,7 @@ if __name__ == '__main__':
|
|
647
647
|
df = pd.read_csv('/Users/xigua/Downloads/余额查询.csv', encoding='utf-8_sig', header=0, na_filter=False)
|
648
648
|
# df = df.to_dict(orient='records')
|
649
649
|
m = MysqlUpload(username=username, password=password, host=host, port=port)
|
650
|
-
m.df_to_mysql_new(df=df, db_name='te2- %s t',
|
650
|
+
m.df_to_mysql_new(df=df, db_name='te2- %s t', table_name='测 -sdf @%试 表')
|
651
651
|
|
652
652
|
|
653
653
|
|
mdbq/mysql/s_query.py
CHANGED
@@ -34,13 +34,13 @@ class QueryDatas:
|
|
34
34
|
'cursorclass': pymysql.cursors.DictCursor,
|
35
35
|
}
|
36
36
|
|
37
|
-
def data_to_df(self, db_name,
|
37
|
+
def data_to_df(self, db_name, table_name, start_date, end_date, projection: dict=[]):
|
38
38
|
|
39
39
|
start_date = pd.to_datetime(start_date).strftime('%Y-%m-%d')
|
40
40
|
end_date = pd.to_datetime(end_date).strftime('%Y-%m-%d')
|
41
41
|
df = pd.DataFrame() # 初始化df
|
42
42
|
|
43
|
-
if self.check_infos(db_name,
|
43
|
+
if self.check_infos(db_name, table_name) == False:
|
44
44
|
return df
|
45
45
|
|
46
46
|
self.config.update({'database': db_name})
|
@@ -49,7 +49,7 @@ class QueryDatas:
|
|
49
49
|
with connection.cursor() as cursor:
|
50
50
|
# 3. 获取数据表的所有列信息
|
51
51
|
sql = 'SELECT COLUMN_NAME FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
|
52
|
-
cursor.execute(sql, (db_name, {
|
52
|
+
cursor.execute(sql, (db_name, {table_name}))
|
53
53
|
columns = cursor.fetchall()
|
54
54
|
cols_exist = [col['COLUMN_NAME'] for col in columns] # 数据表的所有列, 返回 list
|
55
55
|
|
@@ -61,17 +61,17 @@ class QueryDatas:
|
|
61
61
|
columns_in.append(key) # 提取值为 1 的键并清理不在数据表的键
|
62
62
|
columns_in = ', '.join(columns_in)
|
63
63
|
if '日期' in cols_exist: # 不论是否指定, 只要数据表有日期,则执行
|
64
|
-
sql = (f"SELECT {columns_in} FROM {db_name}.{
|
64
|
+
sql = (f"SELECT {columns_in} FROM {db_name}.{table_name} "
|
65
65
|
f"WHERE {'日期'} BETWEEN '{start_date}' AND '{end_date}'")
|
66
66
|
else: # 数据表没有日期列时,返回指定列的所有数据
|
67
|
-
sql = f"SELECT {columns_in} FROM {db_name}.{
|
67
|
+
sql = f"SELECT {columns_in} FROM {db_name}.{table_name}"
|
68
68
|
else: # 没有指定获取列时
|
69
69
|
if '日期' in cols_exist: # 但数据表有日期,仍然执行
|
70
70
|
columns_in = ', '.join(cols_exist)
|
71
|
-
sql = (f"SELECT {columns_in} FROM {db_name}.{
|
71
|
+
sql = (f"SELECT {columns_in} FROM {db_name}.{table_name} "
|
72
72
|
f"WHERE {'日期'} BETWEEN '{start_date}' AND '{end_date}'")
|
73
73
|
else: # 没有指定获取列,且数据表也没有日期列,则返回全部列的全部数据
|
74
|
-
sql = f"SELECT * FROM {db_name}.{
|
74
|
+
sql = f"SELECT * FROM {db_name}.{table_name}"
|
75
75
|
cursor.execute(sql)
|
76
76
|
rows = cursor.fetchall() # 获取查询结果
|
77
77
|
columns = [desc[0] for desc in cursor.description]
|
@@ -83,15 +83,15 @@ class QueryDatas:
|
|
83
83
|
connection.close()
|
84
84
|
|
85
85
|
if len(df) == 0:
|
86
|
-
print(f'database: {db_name}, table: {
|
86
|
+
print(f'database: {db_name}, table: {table_name} 查询的数据为空')
|
87
87
|
return df
|
88
88
|
|
89
|
-
def columns_to_list(self, db_name,
|
89
|
+
def columns_to_list(self, db_name, table_name, columns_name) -> list:
|
90
90
|
"""
|
91
91
|
获取数据表的指定列, 返回列表
|
92
92
|
[{'视频bv号': 'BV1Dm4y1S7BU', '下载进度': 1}, {'视频bv号': 'BV1ov411c7US', '下载进度': 1}]
|
93
93
|
"""
|
94
|
-
if self.check_infos(db_name,
|
94
|
+
if self.check_infos(db_name, table_name) == False: # 检查传入的数据库和数据表是否存在
|
95
95
|
return []
|
96
96
|
|
97
97
|
self.config.update({'database': db_name})
|
@@ -99,26 +99,26 @@ class QueryDatas:
|
|
99
99
|
with connection.cursor() as cursor:
|
100
100
|
# 3. 获取数据表的所有列信息
|
101
101
|
sql = 'SELECT COLUMN_NAME FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
|
102
|
-
cursor.execute(sql, (db_name, {
|
102
|
+
cursor.execute(sql, (db_name, {table_name}))
|
103
103
|
columns = cursor.fetchall()
|
104
104
|
cols_exist = [col['COLUMN_NAME'] for col in columns] # 数据表的所有列, 返回 list
|
105
105
|
columns_name = [item for item in columns_name if item in cols_exist]
|
106
106
|
if len(columns_name) == 0:
|
107
107
|
return []
|
108
108
|
columns_in = ', '.join(columns_name)
|
109
|
-
sql = (f"SELECT {columns_in} FROM {db_name}.{
|
109
|
+
sql = (f"SELECT {columns_in} FROM {db_name}.{table_name} ")
|
110
110
|
cursor.execute(sql)
|
111
111
|
column_values = cursor.fetchall() # 返回指定列,结果是[dict, dict, dict, ...]
|
112
112
|
# column_values = [item[column_name] for item in column_values] # 提取字典的值, 组成列表
|
113
113
|
connection.close()
|
114
114
|
return column_values
|
115
115
|
|
116
|
-
def dtypes_to_list(self, db_name,
|
116
|
+
def dtypes_to_list(self, db_name, table_name) -> list:
|
117
117
|
"""
|
118
118
|
获取数据表的指定列, 返回列表
|
119
119
|
[{'视频bv号': 'BV1Dm4y1S7BU', '下载进度': 1}, {'视频bv号': 'BV1ov411c7US', '下载进度': 1}]
|
120
120
|
"""
|
121
|
-
if self.check_infos(db_name,
|
121
|
+
if self.check_infos(db_name, table_name) == False: # 检查传入的数据库和数据表是否存在
|
122
122
|
return []
|
123
123
|
|
124
124
|
self.config.update({'database': db_name})
|
@@ -126,12 +126,12 @@ class QueryDatas:
|
|
126
126
|
with connection.cursor() as cursor:
|
127
127
|
# 3. 获取数据表的所有列信息
|
128
128
|
sql = 'SELECT COLUMN_NAME, COLUMN_TYPE FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
|
129
|
-
cursor.execute(sql, (db_name, {
|
129
|
+
cursor.execute(sql, (db_name, {table_name}))
|
130
130
|
column_name_and_type = cursor.fetchall()
|
131
131
|
connection.close()
|
132
132
|
return column_name_and_type
|
133
133
|
|
134
|
-
def check_infos(self, db_name,
|
134
|
+
def check_infos(self, db_name, table_name) -> bool:
|
135
135
|
""" 检查数据库、数据表是否存在 """
|
136
136
|
connection = pymysql.connect(**self.config) # 连接数据库
|
137
137
|
try:
|
@@ -150,10 +150,10 @@ class QueryDatas:
|
|
150
150
|
try:
|
151
151
|
with connection.cursor() as cursor:
|
152
152
|
# 2. 查询表是否存在
|
153
|
-
sql = f"SHOW TABLES LIKE '{
|
153
|
+
sql = f"SHOW TABLES LIKE '{table_name}'"
|
154
154
|
cursor.execute(sql)
|
155
155
|
if not cursor.fetchone():
|
156
|
-
print(f'{db_name} -> <{
|
156
|
+
print(f'{db_name} -> <{table_name}>: 表不存在')
|
157
157
|
return False
|
158
158
|
return True
|
159
159
|
except Exception as e:
|
@@ -168,5 +168,5 @@ if __name__ == '__main__':
|
|
168
168
|
print(username, password, host, port)
|
169
169
|
|
170
170
|
q = QueryDatas(username, password, host, port)
|
171
|
-
res = q.columns_to_list(db_name='视频数据',
|
171
|
+
res = q.columns_to_list(db_name='视频数据', table_name='bilibili视频', columns_name=['视频bv号', '下载进度'])
|
172
172
|
print(res)
|
@@ -1,11 +1,11 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/aggregation.py,sha256=
|
4
|
+
mdbq/aggregation/aggregation.py,sha256=sAF04wcgpp_sVgQyYZcc3vUCP_KVLGaNE0klGKdfwbU,53066
|
5
5
|
mdbq/aggregation/df_types.py,sha256=T35KML0sdch8GzIwo7CxSIrt72YVElBeCrsKQx4dX_0,7531
|
6
|
-
mdbq/aggregation/mysql_types.py,sha256
|
6
|
+
mdbq/aggregation/mysql_types.py,sha256=BbIJlg4s1JXuoVa7fLkRPGEqEcCg7X6HDBzmMrKL7-M,10073
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=jLAWtxPUuhpo4XTVrhKtT4xK3grs7r73ePQfLhxlu1I,779
|
8
|
-
mdbq/aggregation/query_data.py,sha256=
|
8
|
+
mdbq/aggregation/query_data.py,sha256=DgLHguCJO9iPc1PLuTqC0edegEc7-No2mfylIfFTj38,24531
|
9
9
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
10
10
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
11
11
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
@@ -14,7 +14,7 @@ mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
|
|
14
14
|
mdbq/company/copysh.py,sha256=i8f8YxmUg-EIzQR-ZHTtnC1A5InwsRtY1_sIsCznVp8,16363
|
15
15
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
16
16
|
mdbq/config/get_myconf.py,sha256=ffYNOFD5r-cWf7ljZVugYYCS4BrD31j_2xIUnbTZ9iw,5996
|
17
|
-
mdbq/config/products.py,sha256=
|
17
|
+
mdbq/config/products.py,sha256=9gqXJMsw8KKuD4Xs6krNgcF7AuWDvV7clI6wVi3QjcA,4260
|
18
18
|
mdbq/config/set_support.py,sha256=LJLEbUFrv8y-GVskiwOI8A9uRaCEAUa0Yfjugt4yLp0,768
|
19
19
|
mdbq/config/update_conf.py,sha256=taL3ZqKgiVWwUrDFuaYhim9a72Hm4BHRhhDscJTziR8,4535
|
20
20
|
mdbq/dataframe/__init__.py,sha256=2HtCN8AdRj53teXDqzysC1h8aPL-mMFy561ESmhehGQ,22
|
@@ -25,8 +25,8 @@ mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
|
|
25
25
|
mdbq/mongo/mongo.py,sha256=q0B4wXDSTtXg_vMN7MPh6zdxl6tT68tM74LmdVNQQek,31892
|
26
26
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
27
27
|
mdbq/mysql/data_types_即将删除.py,sha256=sjBBDKr9674LdjM5N_dwyJACdZPbdB8Beli59jGdgnQ,10378
|
28
|
-
mdbq/mysql/mysql.py,sha256=
|
29
|
-
mdbq/mysql/s_query.py,sha256=
|
28
|
+
mdbq/mysql/mysql.py,sha256=pd84IiiQZchMQJ6F328mxYF8BBzW32xJkrTL80H08ug,31914
|
29
|
+
mdbq/mysql/s_query.py,sha256=4c24SwbqtnO33o8CgWlTQ_j8sZYl5BRIQkaD9CI-vTY,7901
|
30
30
|
mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
|
31
31
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
32
32
|
mdbq/other/porxy.py,sha256=UHfgEyXugogvXgsG68a7QouUCKaohTKKkI4RN-kYSdQ,4961
|
@@ -36,7 +36,7 @@ mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
|
|
36
36
|
mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
|
37
37
|
mdbq/pbix/refresh_all.py,sha256=tgy762608HMaXWynbOURIf2UVMuSPybzrDXQnOOcnZU,6102
|
38
38
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
39
|
-
mdbq-1.0.
|
40
|
-
mdbq-1.0.
|
41
|
-
mdbq-1.0.
|
42
|
-
mdbq-1.0.
|
39
|
+
mdbq-1.0.3.dist-info/METADATA,sha256=N6Il4o1gI1rRaN2zACQsdju9RuTOkUIitWd3f7ZQfKI,245
|
40
|
+
mdbq-1.0.3.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
41
|
+
mdbq-1.0.3.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
42
|
+
mdbq-1.0.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|