mdbq 0.1.8__tar.gz → 0.1.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbq-0.1.8 → mdbq-0.1.9}/PKG-INFO +1 -1
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/aggregation/aggregation.py +43 -3
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/aggregation/query_data.py +47 -45
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq.egg-info/PKG-INFO +1 -1
- {mdbq-0.1.8 → mdbq-0.1.9}/setup.py +1 -1
- {mdbq-0.1.8 → mdbq-0.1.9}/README.txt +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/__init__.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/__version__.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/aggregation/__init__.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/bdup/__init__.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/bdup/bdup.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/clean/__init__.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/clean/data_clean.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/company/__init__.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/company/copysh.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/config/__init__.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/config/get_myconf.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/config/update_conf.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/dataframe/__init__.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/dataframe/converter.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/log/__init__.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/log/mylogger.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/mongo/__init__.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/mongo/mongo.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/mysql/__init__.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/mysql/mysql.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/mysql/s_query.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/mysql/year_month_day.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/other/__init__.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/other/porxy.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/other/pov_city.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/other/ua_sj.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/pbix/__init__.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/pbix/pbix_refresh.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/pbix/refresh_all.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq/spider/__init__.py +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq.egg-info/SOURCES.txt +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq.egg-info/dependency_links.txt +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/mdbq.egg-info/top_level.txt +0 -0
- {mdbq-0.1.8 → mdbq-0.1.9}/setup.cfg +0 -0
@@ -136,6 +136,7 @@ class DatabaseUpdate:
|
|
136
136
|
def __init__(self, path):
|
137
137
|
self.path = path # 数据所在目录, 即: 下载文件夹
|
138
138
|
self.datas: list = [] # 带更新进数据库的数据集合
|
139
|
+
self.start_date = '2022-01-01' # 日期表的起始日期
|
139
140
|
|
140
141
|
def cleaning(self, is_move=True):
|
141
142
|
"""
|
@@ -740,6 +741,15 @@ class DatabaseUpdate:
|
|
740
741
|
os.remove(os.path.join(root, name))
|
741
742
|
json_data.dtypes_to_file() # 写入 json 文件, 包含数据的 dtypes 信息
|
742
743
|
|
744
|
+
df = self.date_table() # 创建一个日期表
|
745
|
+
self.datas.append(
|
746
|
+
{
|
747
|
+
'数据库名': '聚合数据',
|
748
|
+
'集合名称': '日期表',
|
749
|
+
'数据主体': df,
|
750
|
+
}
|
751
|
+
)
|
752
|
+
|
743
753
|
def upload_df(self, service_databases=[{}]):
|
744
754
|
"""
|
745
755
|
将清洗后的 df 上传数据库
|
@@ -896,6 +906,32 @@ class DatabaseUpdate:
|
|
896
906
|
encod = chardet.detect(f1).get('encoding')
|
897
907
|
return encod
|
898
908
|
|
909
|
+
def date_table(self):
|
910
|
+
"""
|
911
|
+
生成 pbix使用的日期表
|
912
|
+
"""
|
913
|
+
yesterday = time.strftime('%Y-%m-%d', time.localtime(time.time() - 86400))
|
914
|
+
dic = pd.date_range(start=self.start_date, end=yesterday)
|
915
|
+
df = pd.DataFrame(dic, columns=['日期'])
|
916
|
+
df.sort_values('日期', ascending=True, ignore_index=True, inplace=True)
|
917
|
+
df.reset_index(inplace=True)
|
918
|
+
# inplace 添加索引到 df
|
919
|
+
p = df.pop('index')
|
920
|
+
df['月2'] = df['日期']
|
921
|
+
df['月2'] = df['月2'].dt.month
|
922
|
+
df['日期'] = df['日期'].dt.date # 日期格式保留年月日,去掉时分秒
|
923
|
+
df['年'] = df['日期'].apply(lambda x: str(x).split('-')[0] + '年')
|
924
|
+
df['月'] = df['月2'].apply(lambda x: str(x) + '月')
|
925
|
+
# df.drop('月2', axis=1, inplace=True)
|
926
|
+
mon = df.pop('月2')
|
927
|
+
df['日'] = df['日期'].apply(lambda x: str(x).split('-')[2])
|
928
|
+
df['年月'] = df.apply(lambda x: x['年'] + x['月'], axis=1)
|
929
|
+
df['月日'] = df.apply(lambda x: x['月'] + x['日'] + '日', axis=1)
|
930
|
+
df['第n周'] = df['日期'].apply(lambda x: x.strftime('第%W周'))
|
931
|
+
df['索引'] = p
|
932
|
+
df['月索引'] = mon
|
933
|
+
df.sort_values('日期', ascending=False, ignore_index=True, inplace=True)
|
934
|
+
return df
|
899
935
|
|
900
936
|
def update_dtypte():
|
901
937
|
""" 更新一个文件的 dtype 信息到 json 文件 """
|
@@ -991,7 +1027,11 @@ def main():
|
|
991
1027
|
d = DatabaseUpdate(path='/Users/xigua/Downloads')
|
992
1028
|
d.new_unzip(is_move=True)
|
993
1029
|
d.cleaning(is_move=False)
|
994
|
-
d.upload_df(service_databases=[
|
1030
|
+
d.upload_df(service_databases=[
|
1031
|
+
# {'home_lx': 'mongodb'},
|
1032
|
+
{'home_lx': 'mysql'}
|
1033
|
+
]
|
1034
|
+
)
|
995
1035
|
# print(d.datas)
|
996
1036
|
|
997
1037
|
|
@@ -999,5 +1039,5 @@ if __name__ == '__main__':
|
|
999
1039
|
# username, password, host, port = get_myconf.select_config_values(target_service='nas', database='mysql')
|
1000
1040
|
# print(username, password, host, port)
|
1001
1041
|
|
1002
|
-
|
1003
|
-
upload()
|
1042
|
+
main()
|
1043
|
+
# upload()
|
@@ -396,57 +396,59 @@ class GroupBy:
|
|
396
396
|
index=index, header=header, engine=engine, freeze_panes=freeze_panes)
|
397
397
|
|
398
398
|
|
399
|
-
def data_aggregation():
|
399
|
+
def data_aggregation(service_databases=[{}]):
|
400
400
|
"""
|
401
401
|
1. 从数据库中读取数据
|
402
402
|
2. 数据聚合清洗
|
403
403
|
3. 统一回传数据库: <聚合数据> (不再导出为文件)
|
404
404
|
"""
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
405
|
+
for service_database in service_databases:
|
406
|
+
for service_name, database in service_database.items():
|
407
|
+
sdq = MysqlDatasQuery(target_service=service_name) # 实例化数据处理类
|
408
|
+
sdq.months = 0 # 设置数据周期
|
409
|
+
g = GroupBy() # 实例化数据聚合类
|
410
|
+
# 实例化数据库连接
|
411
|
+
username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
|
412
|
+
m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
|
411
413
|
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
414
|
+
data_dict = [
|
415
|
+
{
|
416
|
+
'数据库名': '聚合数据',
|
417
|
+
'集合名': '推广数据_宝贝主体报表',
|
418
|
+
'数据主体': sdq.tg_wxt(),
|
419
|
+
},
|
420
|
+
{
|
421
|
+
'数据库名': '聚合数据',
|
422
|
+
'集合名': '天猫生意经_宝贝指标',
|
423
|
+
'数据主体': sdq.syj(),
|
424
|
+
},
|
425
|
+
{
|
426
|
+
'数据库名': '聚合数据',
|
427
|
+
'集合名': '天猫_店铺来源_日数据',
|
428
|
+
'数据主体': sdq.dplyd(),
|
429
|
+
},
|
430
|
+
{
|
431
|
+
'数据库名': '聚合数据',
|
432
|
+
'集合名': '商品id编码表',
|
433
|
+
'数据主体': sdq.idbm(),
|
434
|
+
},
|
435
|
+
{
|
436
|
+
'数据库名': '聚合数据',
|
437
|
+
'集合名': '商品id图片对照表',
|
438
|
+
'数据主体': sdq.sp_picture(),
|
439
|
+
},
|
440
|
+
{
|
441
|
+
'数据库名': '聚合数据',
|
442
|
+
'集合名': '商品成本',
|
443
|
+
'数据主体': sdq.sp_cost(),
|
444
|
+
},
|
445
|
+
]
|
446
|
+
for items in data_dict:
|
447
|
+
db_name, tabel_name, df = items['数据库名'], items['集合名'], items['数据主体']
|
448
|
+
df = g.groupby(df=df, tabel_name=tabel_name, is_maximize=True) # 2. 聚合数据
|
449
|
+
# g.as_csv(df=df, filename=tabel_name + '.csv')
|
450
|
+
m.df_to_mysql(df=df, db_name=db_name, tabel_name=tabel_name) # 3. 回传数据库
|
449
451
|
|
450
452
|
|
451
453
|
if __name__ == '__main__':
|
452
|
-
data_aggregation()
|
454
|
+
data_aggregation(service_databases=[{'company': 'mysql'}])
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|