mdbq 0.1.8__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +44 -4
- mdbq/aggregation/query_data.py +108 -47
- mdbq/mysql/data_types.py +245 -0
- mdbq/mysql/s_query.py +18 -0
- {mdbq-0.1.8.dist-info → mdbq-0.2.0.dist-info}/METADATA +1 -1
- {mdbq-0.1.8.dist-info → mdbq-0.2.0.dist-info}/RECORD +8 -7
- {mdbq-0.1.8.dist-info → mdbq-0.2.0.dist-info}/WHEEL +0 -0
- {mdbq-0.1.8.dist-info → mdbq-0.2.0.dist-info}/top_level.txt +0 -0
mdbq/aggregation/aggregation.py
CHANGED
@@ -136,6 +136,7 @@ class DatabaseUpdate:
|
|
136
136
|
def __init__(self, path):
|
137
137
|
self.path = path # 数据所在目录, 即: 下载文件夹
|
138
138
|
self.datas: list = [] # 带更新进数据库的数据集合
|
139
|
+
self.start_date = '2022-01-01' # 日期表的起始日期
|
139
140
|
|
140
141
|
def cleaning(self, is_move=True):
|
141
142
|
"""
|
@@ -740,6 +741,15 @@ class DatabaseUpdate:
|
|
740
741
|
os.remove(os.path.join(root, name))
|
741
742
|
json_data.dtypes_to_file() # 写入 json 文件, 包含数据的 dtypes 信息
|
742
743
|
|
744
|
+
df = self.date_table() # 创建一个日期表
|
745
|
+
self.datas.append(
|
746
|
+
{
|
747
|
+
'数据库名': '聚合数据',
|
748
|
+
'集合名称': '日期表',
|
749
|
+
'数据主体': df,
|
750
|
+
}
|
751
|
+
)
|
752
|
+
|
743
753
|
def upload_df(self, service_databases=[{}]):
|
744
754
|
"""
|
745
755
|
将清洗后的 df 上传数据库
|
@@ -896,6 +906,32 @@ class DatabaseUpdate:
|
|
896
906
|
encod = chardet.detect(f1).get('encoding')
|
897
907
|
return encod
|
898
908
|
|
909
|
+
def date_table(self):
|
910
|
+
"""
|
911
|
+
生成 pbix使用的日期表
|
912
|
+
"""
|
913
|
+
yesterday = time.strftime('%Y-%m-%d', time.localtime(time.time() - 86400))
|
914
|
+
dic = pd.date_range(start=self.start_date, end=yesterday)
|
915
|
+
df = pd.DataFrame(dic, columns=['日期'])
|
916
|
+
df.sort_values('日期', ascending=True, ignore_index=True, inplace=True)
|
917
|
+
df.reset_index(inplace=True)
|
918
|
+
# inplace 添加索引到 df
|
919
|
+
p = df.pop('index')
|
920
|
+
df['月2'] = df['日期']
|
921
|
+
df['月2'] = df['月2'].dt.month
|
922
|
+
df['日期'] = df['日期'].dt.date # 日期格式保留年月日,去掉时分秒
|
923
|
+
df['年'] = df['日期'].apply(lambda x: str(x).split('-')[0] + '年')
|
924
|
+
df['月'] = df['月2'].apply(lambda x: str(x) + '月')
|
925
|
+
# df.drop('月2', axis=1, inplace=True)
|
926
|
+
mon = df.pop('月2')
|
927
|
+
df['日'] = df['日期'].apply(lambda x: str(x).split('-')[2])
|
928
|
+
df['年月'] = df.apply(lambda x: x['年'] + x['月'], axis=1)
|
929
|
+
df['月日'] = df.apply(lambda x: x['月'] + x['日'] + '日', axis=1)
|
930
|
+
df['第n周'] = df['日期'].apply(lambda x: x.strftime('第%W周'))
|
931
|
+
df['索引'] = p
|
932
|
+
df['月索引'] = mon
|
933
|
+
df.sort_values('日期', ascending=False, ignore_index=True, inplace=True)
|
934
|
+
return df
|
899
935
|
|
900
936
|
def update_dtypte():
|
901
937
|
""" 更新一个文件的 dtype 信息到 json 文件 """
|
@@ -906,7 +942,7 @@ def update_dtypte():
|
|
906
942
|
df=df,
|
907
943
|
db_name='生意参谋数据2',
|
908
944
|
collection_name='店铺来源_月数据',
|
909
|
-
is_file_dtype=
|
945
|
+
is_file_dtype=True, # 日常需开启文件优先, 正常不要让新文件修改 json 已有的类型
|
910
946
|
)
|
911
947
|
d.dtypes_to_file()
|
912
948
|
|
@@ -991,7 +1027,11 @@ def main():
|
|
991
1027
|
d = DatabaseUpdate(path='/Users/xigua/Downloads')
|
992
1028
|
d.new_unzip(is_move=True)
|
993
1029
|
d.cleaning(is_move=False)
|
994
|
-
d.upload_df(service_databases=[
|
1030
|
+
d.upload_df(service_databases=[
|
1031
|
+
# {'home_lx': 'mongodb'},
|
1032
|
+
{'home_lx': 'mysql'}
|
1033
|
+
]
|
1034
|
+
)
|
995
1035
|
# print(d.datas)
|
996
1036
|
|
997
1037
|
|
@@ -999,5 +1039,5 @@ if __name__ == '__main__':
|
|
999
1039
|
# username, password, host, port = get_myconf.select_config_values(target_service='nas', database='mysql')
|
1000
1040
|
# print(username, password, host, port)
|
1001
1041
|
|
1002
|
-
|
1003
|
-
upload()
|
1042
|
+
main()
|
1043
|
+
# upload()
|
mdbq/aggregation/query_data.py
CHANGED
@@ -203,6 +203,7 @@ class GroupBy:
|
|
203
203
|
self.output = os.path.join('C:\\同步空间\\BaiduSyncdisk\\数据库导出')
|
204
204
|
else:
|
205
205
|
self.output = os.path.join('数据中心/数据库导出')
|
206
|
+
self.data_tgyj = {}
|
206
207
|
|
207
208
|
def groupby(self, df, tabel_name, is_maximize=True):
|
208
209
|
"""
|
@@ -250,11 +251,27 @@ class GroupBy:
|
|
250
251
|
}
|
251
252
|
)
|
252
253
|
df.insert(loc=1, column='推广渠道', value='万相台无界版') # df中插入新列
|
254
|
+
df_new = df.groupby(['日期', '商品id'], as_index=False).agg(
|
255
|
+
**{
|
256
|
+
'花费': ('花费', np.sum),
|
257
|
+
'成交笔数': ('成交笔数', np.max),
|
258
|
+
'成交金额': ('成交金额', np.max),
|
259
|
+
'自然流量曝光量': ('自然流量曝光量', np.max),
|
260
|
+
'直接成交笔数': ('直接成交笔数', np.max),
|
261
|
+
'直接成交金额': ('直接成交金额', np.max)
|
262
|
+
}
|
263
|
+
)
|
264
|
+
self.data_tgyj.update(
|
265
|
+
{
|
266
|
+
tabel_name: df_new,
|
267
|
+
}
|
268
|
+
)
|
253
269
|
return df
|
254
270
|
elif '宝贝指标' in tabel_name:
|
271
|
+
""" 聚合时不可以加商家编码,编码有些是空白,有些是 0 """
|
255
272
|
df.fillna(0, inplace=True)
|
256
273
|
df = df[(df['销售额'] != 0) | (df['退款额'] != 0)]
|
257
|
-
df = df.groupby(['日期', '宝贝id', '
|
274
|
+
df = df.groupby(['日期', '宝贝id', '行业类目'], as_index=False).agg(
|
258
275
|
**{'销售额': ('销售额', np.min),
|
259
276
|
'销售量': ('销售量', np.min),
|
260
277
|
'订单数': ('订单数', np.min),
|
@@ -272,6 +289,11 @@ class GroupBy:
|
|
272
289
|
else '300+' if x >= 300
|
273
290
|
else '300以下'
|
274
291
|
)
|
292
|
+
self.data_tgyj.update(
|
293
|
+
{
|
294
|
+
tabel_name: df[['日期', '宝贝id', '销售额', '销售量']],
|
295
|
+
}
|
296
|
+
)
|
275
297
|
return df
|
276
298
|
elif '店铺来源_日数据' in tabel_name:
|
277
299
|
return df
|
@@ -288,6 +310,11 @@ class GroupBy:
|
|
288
310
|
df.drop('行业类目', axis=1, inplace=True)
|
289
311
|
df.sort_values('宝贝id', ascending=False, inplace=True)
|
290
312
|
df = df[(df['宝贝id'] != '973') & (df['宝贝id'] != '973')]
|
313
|
+
self.data_tgyj.update(
|
314
|
+
{
|
315
|
+
tabel_name: df[['宝贝id', '商家编码']],
|
316
|
+
}
|
317
|
+
)
|
291
318
|
return df
|
292
319
|
elif '商品id图片对照表' in tabel_name:
|
293
320
|
df['商品id'] = df['商品id'].astype('int64')
|
@@ -305,15 +332,45 @@ class GroupBy:
|
|
305
332
|
df['商品图片'] = df['商品图片'].apply(lambda x: x if 'http' in x else None) # 检查是否是 http 链接
|
306
333
|
df.dropna(how='all', subset=['商品图片'], axis=0, inplace=True) # 删除指定列含有空值的行
|
307
334
|
df.sort_values(by='商品id', ascending=False, ignore_index=True, inplace=True) # ascending=False 降序排列
|
335
|
+
self.data_tgyj.update(
|
336
|
+
{
|
337
|
+
tabel_name: df[['商品id', '商品图片']],
|
338
|
+
}
|
339
|
+
)
|
308
340
|
return df
|
309
341
|
elif '商品成本' in tabel_name:
|
310
342
|
df.sort_values(by=['款号', '日期'], ascending=[False, True], ignore_index=True, inplace=True)
|
311
343
|
df.drop_duplicates(subset=['款号'], keep='last', inplace=True, ignore_index=True)
|
344
|
+
self.data_tgyj.update(
|
345
|
+
{
|
346
|
+
tabel_name: df[['款号', '成本价']],
|
347
|
+
}
|
348
|
+
)
|
312
349
|
return df
|
313
350
|
else:
|
314
351
|
print(f'<{tabel_name}>: Groupby 类尚未配置,数据为空')
|
315
352
|
return pd.DataFrame({})
|
316
|
-
|
353
|
+
|
354
|
+
def performance(self):
|
355
|
+
# print(self.data_tgyj)
|
356
|
+
tg, syj, idbm, pic, cost = (
|
357
|
+
self.data_tgyj['推广数据_宝贝主体报表'],
|
358
|
+
self.data_tgyj['天猫生意经_宝贝指标'],
|
359
|
+
self.data_tgyj['商品id编码表'],
|
360
|
+
self.data_tgyj['商品id图片对照表'],
|
361
|
+
self.data_tgyj['商品成本']) # 这里不要加逗号
|
362
|
+
pic['商品id'] = pic['商品id'].astype(str)
|
363
|
+
df = pd.merge(idbm, pic, how='left', left_on='宝贝id', right_on='商品id')
|
364
|
+
df = df[['宝贝id', '商家编码', '商品图片']]
|
365
|
+
df = pd.merge(df, cost, how='left', left_on='商家编码', right_on='款号')
|
366
|
+
df = df[['宝贝id', '商家编码', '商品图片', '成本价']]
|
367
|
+
df = pd.merge(tg, df, how='left', left_on='商品id', right_on='宝贝id')
|
368
|
+
df.drop(labels='宝贝id', axis=1, inplace=True)
|
369
|
+
df = pd.merge(df, syj, how='left', left_on=['日期', '商品id'], right_on=['日期', '宝贝id'])
|
370
|
+
df.drop(labels='宝贝id', axis=1, inplace=True)
|
371
|
+
df.drop_duplicates(subset=['日期', '商品id', '花费', '销售额'], keep='last', inplace=True, ignore_index=True)
|
372
|
+
return df
|
373
|
+
|
317
374
|
def as_csv(self, df, filename, path=None, encoding='utf-8_sig',
|
318
375
|
index=False, header=True, st_ascend=None, ascend=None, freq=None):
|
319
376
|
"""
|
@@ -396,57 +453,61 @@ class GroupBy:
|
|
396
453
|
index=index, header=header, engine=engine, freeze_panes=freeze_panes)
|
397
454
|
|
398
455
|
|
399
|
-
def data_aggregation():
|
456
|
+
def data_aggregation(service_databases=[{}]):
|
400
457
|
"""
|
401
458
|
1. 从数据库中读取数据
|
402
459
|
2. 数据聚合清洗
|
403
460
|
3. 统一回传数据库: <聚合数据> (不再导出为文件)
|
404
461
|
"""
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
462
|
+
for service_database in service_databases:
|
463
|
+
for service_name, database in service_database.items():
|
464
|
+
sdq = MysqlDatasQuery(target_service=service_name) # 实例化数据处理类
|
465
|
+
sdq.months = 0 # 设置数据周期
|
466
|
+
g = GroupBy() # 实例化数据聚合类
|
467
|
+
# 实例化数据库连接
|
468
|
+
username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
|
469
|
+
m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
|
411
470
|
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
471
|
+
data_dict = [
|
472
|
+
{
|
473
|
+
'数据库名': '聚合数据',
|
474
|
+
'集合名': '推广数据_宝贝主体报表',
|
475
|
+
'数据主体': sdq.tg_wxt(),
|
476
|
+
},
|
477
|
+
{
|
478
|
+
'数据库名': '聚合数据',
|
479
|
+
'集合名': '天猫生意经_宝贝指标',
|
480
|
+
'数据主体': sdq.syj(),
|
481
|
+
},
|
482
|
+
{
|
483
|
+
'数据库名': '聚合数据',
|
484
|
+
'集合名': '天猫_店铺来源_日数据',
|
485
|
+
'数据主体': sdq.dplyd(),
|
486
|
+
},
|
487
|
+
{
|
488
|
+
'数据库名': '聚合数据',
|
489
|
+
'集合名': '商品id编码表',
|
490
|
+
'数据主体': sdq.idbm(),
|
491
|
+
},
|
492
|
+
{
|
493
|
+
'数据库名': '聚合数据',
|
494
|
+
'集合名': '商品id图片对照表',
|
495
|
+
'数据主体': sdq.sp_picture(),
|
496
|
+
},
|
497
|
+
{
|
498
|
+
'数据库名': '聚合数据',
|
499
|
+
'集合名': '商品成本',
|
500
|
+
'数据主体': sdq.sp_cost(),
|
501
|
+
},
|
502
|
+
]
|
503
|
+
for items in data_dict:
|
504
|
+
db_name, tabel_name, df = items['数据库名'], items['集合名'], items['数据主体']
|
505
|
+
df = g.groupby(df=df, tabel_name=tabel_name, is_maximize=True) # 2. 聚合数据
|
506
|
+
# g.as_csv(df=df, filename=tabel_name + '.csv')
|
507
|
+
m.df_to_mysql(df=df, db_name=db_name, tabel_name=tabel_name) # 3. 回传数据库
|
508
|
+
res = g.performance()
|
509
|
+
m.df_to_mysql(df=res, db_name='聚合数据', tabel_name='销售盈亏') # 3. 回传数据库
|
449
510
|
|
450
511
|
|
451
512
|
if __name__ == '__main__':
|
452
|
-
data_aggregation()
|
513
|
+
data_aggregation(service_databases=[{'company': 'mysql'}])
|
mdbq/mysql/data_types.py
ADDED
@@ -0,0 +1,245 @@
|
|
1
|
+
# -*- coding:utf-8 -*-
|
2
|
+
import warnings
|
3
|
+
import pandas as pd
|
4
|
+
import numpy as np
|
5
|
+
import chardet
|
6
|
+
import zipfile
|
7
|
+
|
8
|
+
from numpy import dtype
|
9
|
+
from pandas.tseries.holiday import next_monday
|
10
|
+
from pyzipper import PyZipFile
|
11
|
+
import os
|
12
|
+
import platform
|
13
|
+
import json
|
14
|
+
import pymysql
|
15
|
+
from mdbq.mongo import mongo
|
16
|
+
from mdbq.mysql import mysql
|
17
|
+
from mdbq.mysql import s_query
|
18
|
+
from mdbq.config import get_myconf
|
19
|
+
from mdbq.dataframe import converter
|
20
|
+
import datetime
|
21
|
+
import time
|
22
|
+
import re
|
23
|
+
import shutil
|
24
|
+
import getpass
|
25
|
+
|
26
|
+
warnings.filterwarnings('ignore')
|
27
|
+
"""
|
28
|
+
1. 记录 dataframe 或者数据库的列信息(dtypes)
|
29
|
+
2. 更新 mysql 中所有数据库的 dtypes 信息到本地 json
|
30
|
+
"""
|
31
|
+
|
32
|
+
|
33
|
+
class DataTypes:
|
34
|
+
"""
|
35
|
+
数据简介: 记录 dataframe 或者数据库的列信息(dtypes),可以记录其信息或者加载相关信息用于入库使用,
|
36
|
+
第一字段为分类(如 dataframe/mysql),第二字段为数据库名,第三字段为集合名,第四段列名及其数据类型
|
37
|
+
"""
|
38
|
+
def __init__(self):
|
39
|
+
self.datas = {
|
40
|
+
'_json统计':
|
41
|
+
{
|
42
|
+
'分类': 0,
|
43
|
+
'数据库量': 0,
|
44
|
+
'集合数量': 0,
|
45
|
+
'字段量': 0,
|
46
|
+
'数据简介': '记录 dataframe 或者数据库的列信息(dtypes)',
|
47
|
+
}
|
48
|
+
}
|
49
|
+
|
50
|
+
def json_before(self, json_file):
|
51
|
+
""" 本地 json 文件的 dtypes 信息, 初始化更新给 self.datas """
|
52
|
+
if os.path.isfile(json_file):
|
53
|
+
with open(json_file, 'r', encoding='utf-8_sig') as f:
|
54
|
+
json_ = json.load(f)
|
55
|
+
self.datas.update(json_)
|
56
|
+
|
57
|
+
def df_dtypes_to_json(self, db_name, collection_name, path, df=pd.DataFrame(), is_file_dtype=True):
|
58
|
+
if len(df) == 0:
|
59
|
+
return
|
60
|
+
cv = converter.DataFrameConverter()
|
61
|
+
df = cv.convert_df_cols(df=df) # 清理 dataframe 列名的不合规字符
|
62
|
+
dtypes = df.dtypes.apply(str).to_dict()
|
63
|
+
dtypes = {'dataframe': {db_name: {collection_name: dtypes}}}
|
64
|
+
self.dtypes_to_json(dtypes=dtypes, cl='dataframe', db_name=db_name, collection_name=collection_name, path=path, is_file_dtype=is_file_dtype)
|
65
|
+
|
66
|
+
def dtypes_to_json(self, cl, dtypes, db_name, collection_name, path, is_file_dtype, ):
|
67
|
+
""" 更新 dataframe 的 dtypes 信息到 json 文件 """
|
68
|
+
if not os.path.exists(path):
|
69
|
+
os.makedirs(path)
|
70
|
+
json_file = os.path.join(path, 'df_dtypes.json')
|
71
|
+
if os.path.isfile(json_file):
|
72
|
+
self.json_before(json_file=json_file) # 更新本地json信息到 self.datas
|
73
|
+
|
74
|
+
if not os.path.isfile(json_file): # 如果不存在本地 json 文件, 直接返回即可
|
75
|
+
self.datas.update(dtypes)
|
76
|
+
with open(json_file, 'w', encoding='utf-8_sig') as f:
|
77
|
+
json.dump(self.datas, f, ensure_ascii=False, sort_keys=True, indent=4)
|
78
|
+
else: # 存在则读取,并更新 df 的 dtypes
|
79
|
+
if cl in self.datas.keys():
|
80
|
+
if db_name in list(self.datas[cl].keys()): # ['京东数据2', '天猫数据2', '生意参谋数据2', '生意经2']
|
81
|
+
if collection_name in list(self.datas[cl][db_name].keys()):
|
82
|
+
if is_file_dtype: # 旧数据优先
|
83
|
+
# # 用 dtypes 更新, 允许手动指定 json 文件里面的数据类型
|
84
|
+
dtypes[cl][db_name][collection_name].update(self.datas[cl][db_name][collection_name])
|
85
|
+
# 将 dtypes 更新进去,使 self.datas 包含新旧信息
|
86
|
+
self.datas[cl][db_name][collection_name].update(dtypes[cl][db_name][collection_name])
|
87
|
+
else: # 新数据优先
|
88
|
+
self.datas[cl][db_name][collection_name].update(dtypes[cl][db_name][collection_name])
|
89
|
+
else:
|
90
|
+
if is_file_dtype: # 旧数据优先
|
91
|
+
dtypes[cl][db_name].update(self.datas[cl][db_name])
|
92
|
+
self.datas[cl][db_name].update(dtypes[cl][db_name])
|
93
|
+
else:
|
94
|
+
self.datas[cl][db_name].update(dtypes[cl][db_name])
|
95
|
+
else:
|
96
|
+
# dtypes.update(self.datas) # 可以注释掉, 因为旧数据 self.datas 是空的
|
97
|
+
self.datas[cl].update(dtypes[cl])
|
98
|
+
else:
|
99
|
+
self.datas.update(dtypes)
|
100
|
+
|
101
|
+
cif = 0 # 分类
|
102
|
+
dbs = 0 # 数据库
|
103
|
+
collections = 0 # 集合
|
104
|
+
cols = 0 # 字段
|
105
|
+
for k, v in self.datas.items():
|
106
|
+
if k == '_json统计':
|
107
|
+
continue # 不统计头信息
|
108
|
+
cif += 1
|
109
|
+
for t, g in v.items():
|
110
|
+
dbs += 1
|
111
|
+
for d, j in g.items():
|
112
|
+
collections += 1
|
113
|
+
for t, p in j.items():
|
114
|
+
cols += 1
|
115
|
+
tips = {'分类': cif, '数据库量': dbs, '集合数量': collections, '字段量': cols}
|
116
|
+
self.datas['_json统计'].update(tips)
|
117
|
+
with open(json_file, 'w', encoding='utf-8_sig') as f:
|
118
|
+
json.dump(
|
119
|
+
self.datas,
|
120
|
+
f,
|
121
|
+
ensure_ascii=False, # 默认True,非ASCII字符将被转义。如为False,则非ASCII字符会以\uXXXX输出
|
122
|
+
sort_keys=True, # 默认为False。如果为True,则字典的输出将按键排序。
|
123
|
+
indent=4,
|
124
|
+
)
|
125
|
+
|
126
|
+
def mysql_dtypes_to_json(self, db_name, tabel_name, path, is_file_dtype=True):
|
127
|
+
username, password, host, port = get_myconf.select_config_values(
|
128
|
+
target_service='home_lx',
|
129
|
+
database='mysql',
|
130
|
+
)
|
131
|
+
sq = s_query.QueryDatas(username=username, password=password, host=host, port=port)
|
132
|
+
name_type = sq.dtypes_to_list(db_name=db_name, tabel_name=tabel_name)
|
133
|
+
if name_type:
|
134
|
+
dtypes = {item['COLUMN_NAME']: item['COLUMN_TYPE'] for item in name_type}
|
135
|
+
dtypes = {'mysql': {db_name: {tabel_name: dtypes}}}
|
136
|
+
self.dtypes_to_json(dtypes=dtypes, cl='mysql', db_name=db_name, collection_name=tabel_name, path=path, is_file_dtype=is_file_dtype)
|
137
|
+
else:
|
138
|
+
print(f'数据库回传数据(name_type)为空')
|
139
|
+
|
140
|
+
def load_dtypes(self, db_name, collection_name, path, cl='dataframe', ):
|
141
|
+
if os.path.isfile(path):
|
142
|
+
self.json_before(json_file=path) # 更新本地json信息到 self.datas
|
143
|
+
elif os.path.isdir(path):
|
144
|
+
json_file = os.path.join(path, 'df_dtypes.json')
|
145
|
+
if os.path.isfile(json_file):
|
146
|
+
self.json_before(json_file=json_file)
|
147
|
+
else:
|
148
|
+
print(f'不存在的文件: {json_file}')
|
149
|
+
return
|
150
|
+
|
151
|
+
if cl in self.datas.keys():
|
152
|
+
if db_name in list(self.datas[cl].keys()):
|
153
|
+
if collection_name in list(self.datas[cl][db_name].keys()):
|
154
|
+
return self.datas[cl][db_name][collection_name]
|
155
|
+
else:
|
156
|
+
print(f'不存在的集合名信息: {collection_name}')
|
157
|
+
return {}
|
158
|
+
else:
|
159
|
+
print(f'不存在的数据库信息: {db_name}')
|
160
|
+
return {}
|
161
|
+
else:
|
162
|
+
print(f'不存在的数据分类: {cl}')
|
163
|
+
return {}
|
164
|
+
|
165
|
+
|
166
|
+
def mysql_all_dtypes(path = '/Users/xigua/数据中心/自动0备份/py/数据更新/support'):
|
167
|
+
"""
|
168
|
+
更新 mysql 中所有数据库的 dtypes 信息到本地 json
|
169
|
+
"""
|
170
|
+
if not os.path.isdir(path):
|
171
|
+
if platform.system() == 'Darwin':
|
172
|
+
path = os.path.join('/Users', getpass.getuser(), '数据中心/自动0备份/py/数据更新/support')
|
173
|
+
elif platform.system() == 'Windows':
|
174
|
+
path = os.path.join('C:\\同步空间\\BaiduSyncdisk\\自动0备份\\py\\数据更新\\support')
|
175
|
+
else:
|
176
|
+
path = os.path.join('数据中心/数据库导出')
|
177
|
+
|
178
|
+
username, password, host, port = get_myconf.select_config_values(target_service='home_lx', database='mysql')
|
179
|
+
config = {
|
180
|
+
'host': host,
|
181
|
+
'port': port,
|
182
|
+
'user': username,
|
183
|
+
'password': password,
|
184
|
+
'charset': 'utf8mb4', # utf8mb4 支持存储四字节的UTF-8字符集
|
185
|
+
'cursorclass': pymysql.cursors.DictCursor,
|
186
|
+
}
|
187
|
+
|
188
|
+
connection = pymysql.connect(**config) # 连接数据库
|
189
|
+
with connection.cursor() as cursor:
|
190
|
+
sql = "SHOW DATABASES;"
|
191
|
+
cursor.execute(sql)
|
192
|
+
db_name_lists = cursor.fetchall()
|
193
|
+
db_name_lists = [item['Database'] for item in db_name_lists]
|
194
|
+
connection.close()
|
195
|
+
|
196
|
+
# db_name_lists = [
|
197
|
+
# '京东数据2',
|
198
|
+
# '天猫数据2',
|
199
|
+
# '市场数据2',
|
200
|
+
# '生意参谋数据2',
|
201
|
+
# '生意经2',
|
202
|
+
# '属性设置2',
|
203
|
+
# '聚合数据',
|
204
|
+
# ]
|
205
|
+
results = []
|
206
|
+
for db_name in db_name_lists:
|
207
|
+
config.update({'database': db_name}) # 添加更新 config 字段
|
208
|
+
connection = pymysql.connect(**config) # 连接数据库
|
209
|
+
with connection.cursor() as cursor:
|
210
|
+
sql = f"SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{db_name}';"
|
211
|
+
sql = "SHOW TABLES;"
|
212
|
+
cursor.execute(sql)
|
213
|
+
table_name = cursor.fetchall()
|
214
|
+
data = [{db_name: item['TABLE_NAME']} for item in table_name]
|
215
|
+
results += data
|
216
|
+
connection.close()
|
217
|
+
time.sleep(0.5)
|
218
|
+
|
219
|
+
d = DataTypes()
|
220
|
+
for result in results:
|
221
|
+
for k, v in result.items():
|
222
|
+
d.mysql_dtypes_to_json(db_name=k, tabel_name=v, path=path)
|
223
|
+
|
224
|
+
|
225
|
+
|
226
|
+
def main():
|
227
|
+
""" 示例 """
|
228
|
+
path = '/Users/xigua/数据中心/自动0备份/py/数据更新/support'
|
229
|
+
file = '/Users/xigua/Downloads/baobeitrans-2024-08-21.csv'
|
230
|
+
df = pd.read_csv(file, encoding='utf-8_sig', header=0, na_filter=False)
|
231
|
+
d = DataTypes()
|
232
|
+
|
233
|
+
# 更新一个文件的 dtype 信息到 json 文件
|
234
|
+
d.df_dtypes_to_json(path=path, df=df, db_name='生意经2', collection_name='宝贝指标' )
|
235
|
+
|
236
|
+
# 更新一个数据表的 dtype 信息到 json 文件
|
237
|
+
d.mysql_dtypes_to_json(db_name='生意经2', tabel_name='店铺指标', path=path)
|
238
|
+
|
239
|
+
# 从本地文件中读取 dtype 信息
|
240
|
+
dtypes = d.load_dtypes(cl='mysql', db_name='生意经2', collection_name='店铺指标', path=path)
|
241
|
+
print(dtypes)
|
242
|
+
|
243
|
+
if __name__ == '__main__':
|
244
|
+
# main()
|
245
|
+
mysql_all_dtypes()
|
mdbq/mysql/s_query.py
CHANGED
@@ -113,6 +113,24 @@ class QueryDatas:
|
|
113
113
|
connection.close()
|
114
114
|
return column_values
|
115
115
|
|
116
|
+
def dtypes_to_list(self, db_name, tabel_name) -> list:
|
117
|
+
"""
|
118
|
+
获取数据表的指定列, 返回列表
|
119
|
+
[{'视频bv号': 'BV1Dm4y1S7BU', '下载进度': 1}, {'视频bv号': 'BV1ov411c7US', '下载进度': 1}]
|
120
|
+
"""
|
121
|
+
if self.check_infos(db_name, tabel_name) == False: # 检查传入的数据库和数据表是否存在
|
122
|
+
return []
|
123
|
+
|
124
|
+
self.config.update({'database': db_name})
|
125
|
+
connection = pymysql.connect(**self.config) # 重新连接数据库
|
126
|
+
with connection.cursor() as cursor:
|
127
|
+
# 3. 获取数据表的所有列信息
|
128
|
+
sql = 'SELECT COLUMN_NAME, COLUMN_TYPE FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
|
129
|
+
cursor.execute(sql, (db_name, {tabel_name}))
|
130
|
+
column_name_and_type = cursor.fetchall()
|
131
|
+
connection.close()
|
132
|
+
return column_name_and_type
|
133
|
+
|
116
134
|
def check_infos(self, db_name, tabel_name) -> bool:
|
117
135
|
""" 检查数据库、数据表是否存在 """
|
118
136
|
connection = pymysql.connect(**self.config) # 连接数据库
|
@@ -1,8 +1,8 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/aggregation.py,sha256=
|
5
|
-
mdbq/aggregation/query_data.py,sha256=
|
4
|
+
mdbq/aggregation/aggregation.py,sha256=TkxyIBowTuoNrhVkrgnYNXwNQXCX_xjh7wcYXdP65-E,58496
|
5
|
+
mdbq/aggregation/query_data.py,sha256=Xx3x_3XVsHrVM2-wOQQFu8oOBmWpqjxXSUGAyXvKTag,22819
|
6
6
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
7
7
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
8
8
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
@@ -19,8 +19,9 @@ mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
|
|
19
19
|
mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
|
20
20
|
mdbq/mongo/mongo.py,sha256=q0B4wXDSTtXg_vMN7MPh6zdxl6tT68tM74LmdVNQQek,31892
|
21
21
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
22
|
+
mdbq/mysql/data_types.py,sha256=bn6mVCM_YNy3tEPNYKuTrEe7QKico3PXgAsR2tYcxFg,10362
|
22
23
|
mdbq/mysql/mysql.py,sha256=nVrnkHWlcttr3Mx0Bdneb04oTlKtbDL9WrAUY4IEnow,31363
|
23
|
-
mdbq/mysql/s_query.py,sha256=
|
24
|
+
mdbq/mysql/s_query.py,sha256=6-8O9MHhi3-7n3isJ7t2kTCYL2mSBC_HrxSQmXM5UtI,7901
|
24
25
|
mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
|
25
26
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
26
27
|
mdbq/other/porxy.py,sha256=UHfgEyXugogvXgsG68a7QouUCKaohTKKkI4RN-kYSdQ,4961
|
@@ -30,7 +31,7 @@ mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
|
|
30
31
|
mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
|
31
32
|
mdbq/pbix/refresh_all.py,sha256=wulHs4rivf4Mi0Pii2QR5Nk9-TBcvSwnCB_WH9QULKE,5939
|
32
33
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
33
|
-
mdbq-0.
|
34
|
-
mdbq-0.
|
35
|
-
mdbq-0.
|
36
|
-
mdbq-0.
|
34
|
+
mdbq-0.2.0.dist-info/METADATA,sha256=oLHqCxbG3k-an5nlFWxvYFocYySd44rr97CeESSQ-ck,245
|
35
|
+
mdbq-0.2.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
36
|
+
mdbq-0.2.0.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
37
|
+
mdbq-0.2.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|