mdbq 1.4.8__tar.gz → 1.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbq-1.4.8 → mdbq-1.5.0}/PKG-INFO +1 -1
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/aggregation/aggregation.py +31 -9
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/aggregation/query_data.py +60 -3
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/company/copysh.py +1 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq.egg-info/PKG-INFO +1 -1
- {mdbq-1.4.8 → mdbq-1.5.0}/setup.py +1 -1
- {mdbq-1.4.8 → mdbq-1.5.0}/README.txt +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/__init__.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/__version__.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/aggregation/__init__.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/aggregation/df_types.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/aggregation/mysql_types.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/aggregation/optimize_data.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/bdup/__init__.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/bdup/bdup.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/clean/__init__.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/clean/data_clean.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/company/__init__.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/config/__init__.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/config/get_myconf.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/config/products.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/config/set_support.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/config/update_conf.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/dataframe/__init__.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/dataframe/converter.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/log/__init__.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/log/mylogger.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/mongo/__init__.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/mongo/mongo.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/mysql/__init__.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/mysql/mysql.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/mysql/s_query.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/mysql/year_month_day.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/other/__init__.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/other/porxy.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/other/pov_city.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/other/ua_sj.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/pbix/__init__.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/pbix/pbix_refresh.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/pbix/refresh_all.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq/spider/__init__.py +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq.egg-info/SOURCES.txt +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq.egg-info/dependency_links.txt +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/mdbq.egg-info/top_level.txt +0 -0
- {mdbq-1.4.8 → mdbq-1.5.0}/setup.cfg +0 -0
@@ -833,6 +833,35 @@ class DatabaseUpdate:
|
|
833
833
|
)
|
834
834
|
# return df
|
835
835
|
|
836
|
+
def other_table(self, service_databases=[{'home_lx': 'mysql'}]):
|
837
|
+
""" 上传 support 文件夹下的 主推商品.csv """
|
838
|
+
support_file = set_support.SetSupport(dirname='support').dirname
|
839
|
+
filename = '主推商品.csv'
|
840
|
+
if not os.path.isfile(os.path.join(support_file, filename)):
|
841
|
+
return
|
842
|
+
df = pd.read_csv(os.path.join(support_file, filename), encoding='utf-8_sig', header=0, na_filter=False)
|
843
|
+
for service_database in service_databases:
|
844
|
+
for service_name, database in service_database.items():
|
845
|
+
username, password, host, port = get_myconf.select_config_values(
|
846
|
+
target_service=service_name,
|
847
|
+
database=database,
|
848
|
+
)
|
849
|
+
m = mysql.MysqlUpload(
|
850
|
+
username=username,
|
851
|
+
password=password,
|
852
|
+
host=host,
|
853
|
+
port=port,
|
854
|
+
)
|
855
|
+
m.df_to_mysql(
|
856
|
+
df=df,
|
857
|
+
db_name='属性设置2',
|
858
|
+
table_name='主推商品',
|
859
|
+
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
860
|
+
drop_duplicates=True, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
861
|
+
filename=None, # 用来追踪处理进度
|
862
|
+
system_name=service_name, # 用来追踪处理进度
|
863
|
+
)
|
864
|
+
|
836
865
|
|
837
866
|
def upload_dir(path, db_name, collection_name, dbs={'mysql': True, 'mongodb': True}, json_path=None, system_name=None):
|
838
867
|
""" 上传一个文件夹到 mysql 或者 mongodb 数据库 """
|
@@ -1032,12 +1061,5 @@ if __name__ == '__main__':
|
|
1032
1061
|
|
1033
1062
|
# test2()
|
1034
1063
|
|
1035
|
-
|
1036
|
-
|
1037
|
-
username, password, host, port = get_myconf.select_config_values(target_service='company', database='mysql')
|
1038
|
-
m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
|
1039
|
-
m.df_to_mysql(df=df, db_name='市场数据2', table_name='搜索排行',
|
1040
|
-
drop_duplicates=True,
|
1041
|
-
# icm_update=['日期', '推广费余额'],
|
1042
|
-
system_name='company',
|
1043
|
-
)
|
1064
|
+
dp = DatabaseUpdate(path='')
|
1065
|
+
dp.other_table(service_databases=[{'company': 'mysql'}])
|
@@ -349,9 +349,60 @@ class GroupBy:
|
|
349
349
|
}
|
350
350
|
)
|
351
351
|
return df
|
352
|
-
elif '店铺来源_日数据' in table_name:
|
352
|
+
elif '店铺来源_日数据' in table_name and '旧版' not in table_name:
|
353
|
+
# 包含三级来源名称和预设索引值列
|
354
|
+
# 截取 从上月1日 至 今天的花费数据, 推广款式按此数据从高到低排序(商品图+排序)
|
355
|
+
df_visitor3 = df.groupby(['日期', '三级来源'], as_index=False).agg({'访客数': 'sum'})
|
356
|
+
df_visitor3 = df_visitor3[~df_visitor3['三级来源'].isin([''])] # 指定列中删除包含空值的行
|
357
|
+
# df_visitor = df_visitor[(df_visitor['日期'] >= f'{year_my}-{last_month.month}-01')]
|
358
|
+
df_visitor3 = df_visitor3.groupby(['三级来源'], as_index=False).agg({'访客数': 'sum'})
|
359
|
+
df_visitor3.sort_values('访客数', ascending=False, ignore_index=True, inplace=True)
|
360
|
+
df_visitor3.reset_index(inplace=True)
|
361
|
+
df_visitor3['index'] = df_visitor3['index'] + 100
|
362
|
+
df_visitor3.rename(columns={'index': '三级访客索引'}, inplace=True)
|
363
|
+
df_visitor3 = df_visitor3[['三级来源', '三级访客索引']]
|
364
|
+
|
365
|
+
# 包含二级来源名称和预设索引值列
|
366
|
+
df_visitor2 = df.groupby(['日期', '二级来源'], as_index=False).agg({'访客数': 'sum'})
|
367
|
+
df_visitor2 = df_visitor2[~df_visitor2['二级来源'].isin([''])] # 指定列中删除包含空值的行
|
368
|
+
# df_visitor2 = df_visitor2[(df_visitor2['日期'] >= f'{year_my}-{last_month.month}-01')]
|
369
|
+
df_visitor2 = df_visitor2.groupby(['二级来源'], as_index=False).agg({'访客数': 'sum'})
|
370
|
+
df_visitor2.sort_values('访客数', ascending=False, ignore_index=True, inplace=True)
|
371
|
+
df_visitor2.reset_index(inplace=True)
|
372
|
+
df_visitor2['index'] = df_visitor2['index'] + 100
|
373
|
+
df_visitor2.rename(columns={'index': '二级访客索引'}, inplace=True)
|
374
|
+
df_visitor2 = df_visitor2[['二级来源', '二级访客索引']]
|
375
|
+
|
376
|
+
df = pd.merge(df, df_visitor2, how='left', left_on='二级来源', right_on='二级来源')
|
377
|
+
df = pd.merge(df, df_visitor3, how='left', left_on='三级来源', right_on='三级来源')
|
353
378
|
return df
|
354
|
-
elif '店铺来源_日数据_旧版' in table_name:
|
379
|
+
elif '天猫_店铺来源_日数据_旧版' in table_name:
|
380
|
+
|
381
|
+
# 包含三级来源名称和预设索引值列
|
382
|
+
# 截取 从上月1日 至 今天的花费数据, 推广款式按此数据从高到低排序(商品图+排序)
|
383
|
+
df_visitor3 = df.groupby(['日期', '三级来源'], as_index=False).agg({'访客数': 'sum'})
|
384
|
+
df_visitor3 = df_visitor3[~df_visitor3['三级来源'].isin([''])] # 指定列中删除包含空值的行
|
385
|
+
# df_visitor = df_visitor[(df_visitor['日期'] >= f'{year_my}-{last_month.month}-01')]
|
386
|
+
df_visitor3 = df_visitor3.groupby(['三级来源'], as_index=False).agg({'访客数': 'sum'})
|
387
|
+
df_visitor3.sort_values('访客数', ascending=False, ignore_index=True, inplace=True)
|
388
|
+
df_visitor3.reset_index(inplace=True)
|
389
|
+
df_visitor3['index'] = df_visitor3['index'] + 100
|
390
|
+
df_visitor3.rename(columns={'index': '三级访客索引'}, inplace=True)
|
391
|
+
df_visitor3 = df_visitor3[['三级来源', '三级访客索引']]
|
392
|
+
|
393
|
+
# 包含二级来源名称和预设索引值列
|
394
|
+
df_visitor2 = df.groupby(['日期', '二级来源'], as_index=False).agg({'访客数': 'sum'})
|
395
|
+
df_visitor2 = df_visitor2[~df_visitor2['二级来源'].isin([''])] # 指定列中删除包含空值的行
|
396
|
+
# df_visitor2 = df_visitor2[(df_visitor2['日期'] >= f'{year_my}-{last_month.month}-01')]
|
397
|
+
df_visitor2 = df_visitor2.groupby(['二级来源'], as_index=False).agg({'访客数': 'sum'})
|
398
|
+
df_visitor2.sort_values('访客数', ascending=False, ignore_index=True, inplace=True)
|
399
|
+
df_visitor2.reset_index(inplace=True)
|
400
|
+
df_visitor2['index'] = df_visitor2['index'] + 100
|
401
|
+
df_visitor2.rename(columns={'index': '二级访客索引'}, inplace=True)
|
402
|
+
df_visitor2 = df_visitor2[['二级来源', '二级访客索引']]
|
403
|
+
|
404
|
+
df = pd.merge(df, df_visitor2, how='left', left_on='二级来源', right_on='二级来源')
|
405
|
+
df = pd.merge(df, df_visitor3, how='left', left_on='三级来源', right_on='三级来源')
|
355
406
|
return df
|
356
407
|
elif '商品id编码表' in table_name:
|
357
408
|
df['宝贝id'] = df['宝贝id'].astype(str)
|
@@ -542,6 +593,12 @@ def data_aggregation_one(service_databases=[{}], months=1, system_name=None,):
|
|
542
593
|
# 从数据库中获取数据, 返回包含 df 数据的字典
|
543
594
|
# 单独处理某一个聚合数据库,在这里修改添加 data_dict 的值
|
544
595
|
data_dict = [
|
596
|
+
{
|
597
|
+
'数据库名': '聚合数据',
|
598
|
+
'集合名': '天猫_店铺来源_日数据_旧版',
|
599
|
+
'唯一主键': ['日期', '一级来源', '二级来源', '三级来源'],
|
600
|
+
'数据主体': sdq.dplyd_old(),
|
601
|
+
},
|
545
602
|
{
|
546
603
|
'数据库名': '聚合数据',
|
547
604
|
'集合名': '天猫_店铺来源_日数据',
|
@@ -665,5 +722,5 @@ def data_aggregation(service_databases=[{}], months=1, system_name=None,):
|
|
665
722
|
|
666
723
|
if __name__ == '__main__':
|
667
724
|
# data_aggregation(service_databases=[{'home_lx': 'mysql'}], months=1, system_name='home_lx')
|
668
|
-
data_aggregation_one(service_databases=[{'company': 'mysql'}], months=
|
725
|
+
data_aggregation_one(service_databases=[{'company': 'mysql'}], months=1, system_name='company')
|
669
726
|
# optimize_data.op_data(service_databases=[{'company': 'mysql'}], days=3650) # 立即启动对聚合数据的清理工作
|
@@ -348,6 +348,7 @@ def main():
|
|
348
348
|
dp.cleaning(is_move=True) # 公司台式机需要移除自身下载的文件
|
349
349
|
dp.upload_df(service_databases=[{'company': 'mysql'}], system_name='company')
|
350
350
|
dp.date_table(service_databases=[{'company': 'mysql'}]) # 因为日期表不受 days 参数控制,因此单独更新日期表
|
351
|
+
dp.other_table(service_databases=[{'company': 'mysql'}]) # 上传 support 文件夹下的 主推商品.csv
|
351
352
|
|
352
353
|
# 此操作用于修改 .copysh_conf 文件,将 ch_record 改为 false (更新完成)
|
353
354
|
w = update_conf.UpdateConf()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|