mdbq 1.2.5__py3-none-any.whl → 1.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +10 -0
- mdbq/aggregation/query_data.py +25 -18
- mdbq/mysql/mysql.py +3 -3
- mdbq/mysql/s_query.py +3 -0
- {mdbq-1.2.5.dist-info → mdbq-1.2.6.dist-info}/METADATA +1 -1
- {mdbq-1.2.5.dist-info → mdbq-1.2.6.dist-info}/RECORD +8 -8
- {mdbq-1.2.5.dist-info → mdbq-1.2.6.dist-info}/WHEEL +1 -1
- {mdbq-1.2.5.dist-info → mdbq-1.2.6.dist-info}/top_level.txt +0 -0
mdbq/aggregation/aggregation.py
CHANGED
@@ -850,6 +850,8 @@ def upload_dir(path, db_name, collection_name, dbs={'mysql': True, 'mongodb': Tr
|
|
850
850
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
851
851
|
if len(df) == 0:
|
852
852
|
continue
|
853
|
+
# if '新版' not in name:
|
854
|
+
# continue
|
853
855
|
cv = converter.DataFrameConverter()
|
854
856
|
df = cv.convert_df_cols(df=df) # 清理列名和 df 中的非法字符
|
855
857
|
try:
|
@@ -973,5 +975,13 @@ if __name__ == '__main__':
|
|
973
975
|
# target_service='home_lx',
|
974
976
|
# database='mysql'
|
975
977
|
# )
|
978
|
+
# db_name = '生意参谋2'
|
979
|
+
# table_name = '店铺来源_日数据_新版'
|
980
|
+
# upload_dir(
|
981
|
+
# path='/Users/xigua/数据中心/原始文件2/生意参谋/流量来源',
|
982
|
+
# db_name=db_name,
|
983
|
+
# collection_name=table_name,
|
984
|
+
# dbs={'mysql': True, 'mongodb': False},
|
985
|
+
# )
|
976
986
|
|
977
987
|
test2()
|
mdbq/aggregation/query_data.py
CHANGED
@@ -240,6 +240,7 @@ class GroupBy:
|
|
240
240
|
'总成交金额': '成交金额'
|
241
241
|
}, inplace=True)
|
242
242
|
df = df.astype({
|
243
|
+
'商品id': str,
|
243
244
|
'花费': float,
|
244
245
|
'展现量': int,
|
245
246
|
'点击量': int,
|
@@ -263,12 +264,13 @@ class GroupBy:
|
|
263
264
|
)
|
264
265
|
else:
|
265
266
|
df = df.groupby(['日期', '营销场景', '商品id', '花费', '展现量', '点击量'], as_index=False).agg(
|
266
|
-
**{
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
267
|
+
**{
|
268
|
+
'加购量': ('加购量', np.min),
|
269
|
+
'成交笔数': ('成交笔数', np.min),
|
270
|
+
'成交金额': ('成交金额', np.min),
|
271
|
+
'自然流量曝光量': ('自然流量曝光量', np.min),
|
272
|
+
'直接成交笔数': ('直接成交笔数', np.max),
|
273
|
+
'直接成交金额': ('直接成交金额', np.max)
|
272
274
|
}
|
273
275
|
)
|
274
276
|
df.insert(loc=1, column='推广渠道', value='万相台无界版') # df中插入新列
|
@@ -276,10 +278,10 @@ class GroupBy:
|
|
276
278
|
**{
|
277
279
|
'花费': ('花费', np.sum),
|
278
280
|
'成交笔数': ('成交笔数', np.max),
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
281
|
+
'成交金额': ('成交金额', np.max),
|
282
|
+
'自然流量曝光量': ('自然流量曝光量', np.max),
|
283
|
+
'直接成交笔数': ('直接成交笔数', np.max),
|
284
|
+
'直接成交金额': ('直接成交金额', np.max)
|
283
285
|
}
|
284
286
|
)
|
285
287
|
self.data_tgyj.update(
|
@@ -290,6 +292,7 @@ class GroupBy:
|
|
290
292
|
return df
|
291
293
|
elif '宝贝指标' in table_name:
|
292
294
|
""" 聚合时不可以加商家编码,编码有些是空白,有些是 0 """
|
295
|
+
df['宝贝id'] = df['宝贝id'].astype(str)
|
293
296
|
df.fillna(0, inplace=True)
|
294
297
|
# df = df[(df['销售额'] != 0) | (df['退款额'] != 0)] # 注释掉, 因为后续使用生意经作为基准合并推广表,需确保所有商品id 齐全
|
295
298
|
df = df.groupby(['日期', '宝贝id', '行业类目'], as_index=False).agg(
|
@@ -320,6 +323,7 @@ class GroupBy:
|
|
320
323
|
elif '店铺来源_日数据' in table_name:
|
321
324
|
return df
|
322
325
|
elif '商品id编码表' in table_name:
|
326
|
+
df['宝贝id'] = df['宝贝id'].astype(str)
|
323
327
|
df.drop_duplicates(subset='宝贝id', keep='last', inplace=True, ignore_index=True)
|
324
328
|
# df['行业类目'] = df['行业类目'].apply(lambda x: re.sub(' ', '', x))
|
325
329
|
try:
|
@@ -359,6 +363,7 @@ class GroupBy:
|
|
359
363
|
table_name: df[['商品id', '商品图片']],
|
360
364
|
}
|
361
365
|
)
|
366
|
+
df['商品id'] = df['商品id'].astype(str)
|
362
367
|
return df
|
363
368
|
elif '商品成本' in table_name:
|
364
369
|
df.sort_values(by=['款号', '日期'], ascending=[False, True], ignore_index=True, inplace=True)
|
@@ -373,7 +378,7 @@ class GroupBy:
|
|
373
378
|
print(f'<{table_name}>: Groupby 类尚未配置,数据为空')
|
374
379
|
return pd.DataFrame({})
|
375
380
|
|
376
|
-
@try_except
|
381
|
+
# @try_except
|
377
382
|
def performance(self, bb_tg=True):
|
378
383
|
# print(self.data_tgyj)
|
379
384
|
tg, syj, idbm, pic, cost = (
|
@@ -390,13 +395,14 @@ class GroupBy:
|
|
390
395
|
df = pd.merge(tg, df, how='left', left_on='商品id', right_on='宝贝id')
|
391
396
|
df.drop(labels='宝贝id', axis=1, inplace=True)
|
392
397
|
if bb_tg is True:
|
393
|
-
|
398
|
+
# 生意经合并推广表,完整的数据表,包含全店所有推广、销售数据
|
394
399
|
df = pd.merge(syj, df, how='left', left_on=['日期', '宝贝id'], right_on=['日期', '商品id'])
|
395
400
|
else:
|
396
401
|
# 推广表合并生意经 , 以推广数据为基准,销售数据不齐全
|
397
402
|
df = pd.merge(df, syj, how='left', left_on=['日期', '商品id'], right_on=['日期', '宝贝id'])
|
398
403
|
df.drop(labels='宝贝id', axis=1, inplace=True)
|
399
404
|
df.drop_duplicates(subset=['日期', '商品id', '花费', '销售额'], keep='last', inplace=True, ignore_index=True)
|
405
|
+
df['成本价'] = df['成本价'].astype('float64')
|
400
406
|
df['商品成本'] = df.apply(lambda x: (x['成本价'] + x['销售额']/x['销售量'] * 0.11 + 6) * x['销售量'] if x['销售量'] > 0 else 0, axis=1)
|
401
407
|
df['商品毛利'] = df.apply(lambda x: x['销售额'] - x['商品成本'], axis=1)
|
402
408
|
df['毛利率'] = df.apply(lambda x: round((x['销售额'] - x['商品成本']) / x['销售额'], 4) if x['销售额'] > 0 else 0, axis=1)
|
@@ -493,12 +499,13 @@ def data_aggregation(service_databases=[{}]):
|
|
493
499
|
for service_database in service_databases:
|
494
500
|
for service_name, database in service_database.items():
|
495
501
|
sdq = MysqlDatasQuery(target_service=service_name) # 实例化数据处理类
|
496
|
-
sdq.months =
|
502
|
+
sdq.months = 0 # 设置数据周期, 1 表示近 2 个月
|
497
503
|
g = GroupBy() # 实例化数据聚合类
|
498
504
|
# 实例化数据库连接
|
499
505
|
username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
|
500
506
|
m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
|
501
507
|
|
508
|
+
# 从数据库中获取数据, 返回包含 df 数据的字典
|
502
509
|
data_dict = [
|
503
510
|
{
|
504
511
|
'数据库名': '聚合数据',
|
@@ -531,19 +538,19 @@ def data_aggregation(service_databases=[{}]):
|
|
531
538
|
'数据主体': sdq.sp_cost(),
|
532
539
|
},
|
533
540
|
]
|
534
|
-
for items in data_dict:
|
541
|
+
for items in data_dict: # 遍历返回结果
|
535
542
|
db_name, table_name, df = items['数据库名'], items['集合名'], items['数据主体']
|
536
543
|
df = g.groupby(df=df, table_name=table_name, is_maximize=True) # 2. 聚合数据
|
537
544
|
# g.as_csv(df=df, filename=table_name + '.csv')
|
538
|
-
m.df_to_mysql(df=df, db_name=db_name, table_name=table_name) # 3. 回传数据库
|
545
|
+
m.df_to_mysql(df=df, db_name=db_name, table_name=table_name, drop_dup=True) # 3. 回传数据库
|
539
546
|
res = g.performance(bb_tg=True) # 盈亏表,依赖其他表,单独做
|
540
|
-
m.df_to_mysql(df=res, db_name='聚合数据', table_name='_全店商品销售')
|
547
|
+
m.df_to_mysql(df=res, db_name='聚合数据', table_name='_全店商品销售', drop_dup=True)
|
541
548
|
res = g.performance(bb_tg=False) # 盈亏表,依赖其他表,单独做
|
542
|
-
m.df_to_mysql(df=res, db_name='聚合数据', table_name='_推广商品销售')
|
549
|
+
m.df_to_mysql(df=res, db_name='聚合数据', table_name='_推广商品销售', drop_dup=True)
|
543
550
|
|
544
551
|
# optimize_data.op_data(service_databases=service_databases, days=3650) # 立即启动对聚合数据的清理工作
|
545
552
|
|
546
553
|
|
547
554
|
if __name__ == '__main__':
|
548
|
-
data_aggregation(service_databases=[{'
|
555
|
+
data_aggregation(service_databases=[{'home_lx': 'mysql'}])
|
549
556
|
# optimize_data.op_data(service_databases=[{'company': 'mysql'}], days=3650) # 立即启动对聚合数据的清理工作
|
mdbq/mysql/mysql.py
CHANGED
@@ -65,7 +65,7 @@ class MysqlUpload:
|
|
65
65
|
将 df 写入数据库
|
66
66
|
db_name: 数据库名称
|
67
67
|
table_name: 集合/表名称
|
68
|
-
df_sql: 使用 df.to_sql
|
68
|
+
df_sql: 使用 df.to_sql 函数上传整个表, 不会排重
|
69
69
|
drop_duplicates:仅限于聚合数据使用,其他情况不要设置此参数
|
70
70
|
drop_dup: 值为 True 时检查重复数据再插入,反之直接上传
|
71
71
|
filename: 传这个参数是方便定位产生错误的文件
|
@@ -153,7 +153,7 @@ class MysqlUpload:
|
|
153
153
|
|
154
154
|
if df_sql:
|
155
155
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
|
156
|
-
print(f'{now}正在更新 mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count},{self.filename}')
|
156
|
+
print(f'{now}正在更新 mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count}, {self.filename}')
|
157
157
|
engine = create_engine(
|
158
158
|
f"mysql+pymysql://{self.username}:{self.password}@{self.host}:{self.port}/{db_name}") # 创建数据库引擎
|
159
159
|
df.to_sql(
|
@@ -186,7 +186,7 @@ class MysqlUpload:
|
|
186
186
|
|
187
187
|
# 5. 更新插入数据
|
188
188
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
|
189
|
-
print(f'{now}正在更新 mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count},{self.filename}')
|
189
|
+
print(f'{now}正在更新 mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count}, {self.filename}')
|
190
190
|
|
191
191
|
datas = df.to_dict(orient='records')
|
192
192
|
for data in datas:
|
mdbq/mysql/s_query.py
CHANGED
@@ -12,6 +12,7 @@ from sqlalchemy import create_engine
|
|
12
12
|
import os
|
13
13
|
import calendar
|
14
14
|
from mdbq.config import get_myconf
|
15
|
+
from mdbq.dataframe import converter
|
15
16
|
|
16
17
|
warnings.filterwarnings('ignore')
|
17
18
|
"""
|
@@ -84,6 +85,8 @@ class QueryDatas:
|
|
84
85
|
|
85
86
|
if len(df) == 0:
|
86
87
|
print(f'database: {db_name}, table: {table_name} 查询的数据为空')
|
88
|
+
cv = converter.DataFrameConverter()
|
89
|
+
df = cv.convert_df_cols(df)
|
87
90
|
return df
|
88
91
|
|
89
92
|
def columns_to_list(self, db_name, table_name, columns_name) -> list:
|
@@ -1,11 +1,11 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/aggregation.py,sha256=
|
4
|
+
mdbq/aggregation/aggregation.py,sha256=mBgIY7afloW8H5qoBy56vCabIQRxVvAhrRZgGbZUxFQ,55791
|
5
5
|
mdbq/aggregation/df_types.py,sha256=rHLIgv82PJSFmDvXkZyOJAffXkFyyMyFO23w9tUt8EQ,7525
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=umVixmbFZM63k-QhVWLvOuhcAde4P_oDKbdo8ry2O9w,10633
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=jLAWtxPUuhpo4XTVrhKtT4xK3grs7r73ePQfLhxlu1I,779
|
8
|
-
mdbq/aggregation/query_data.py,sha256=
|
8
|
+
mdbq/aggregation/query_data.py,sha256=fg_9OdNSwHbo9vhK1pAKOazHFHZfE9_rBxRyQIWJX9U,25694
|
9
9
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
10
10
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
11
11
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
@@ -24,8 +24,8 @@ mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
|
|
24
24
|
mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
|
25
25
|
mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
|
26
26
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
27
|
-
mdbq/mysql/mysql.py,sha256=
|
28
|
-
mdbq/mysql/s_query.py,sha256=
|
27
|
+
mdbq/mysql/mysql.py,sha256=KvUQflP5sYOECTHOs2Fs9ABcQvgPCbBnAX2ZlE3JjgY,37544
|
28
|
+
mdbq/mysql/s_query.py,sha256=a33aYhW6gAnspIZfQ7l23ePln9-MD1f_ukypr5M0jd8,8018
|
29
29
|
mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
|
30
30
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
31
31
|
mdbq/other/porxy.py,sha256=UHfgEyXugogvXgsG68a7QouUCKaohTKKkI4RN-kYSdQ,4961
|
@@ -35,7 +35,7 @@ mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
|
|
35
35
|
mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
|
36
36
|
mdbq/pbix/refresh_all.py,sha256=tgy762608HMaXWynbOURIf2UVMuSPybzrDXQnOOcnZU,6102
|
37
37
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
38
|
-
mdbq-1.2.
|
39
|
-
mdbq-1.2.
|
40
|
-
mdbq-1.2.
|
41
|
-
mdbq-1.2.
|
38
|
+
mdbq-1.2.6.dist-info/METADATA,sha256=_s1z5j_Q_dSi4lrw46NcpwMlgz5TkZnndOmWp4290Mk,245
|
39
|
+
mdbq-1.2.6.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
|
40
|
+
mdbq-1.2.6.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
41
|
+
mdbq-1.2.6.dist-info/RECORD,,
|
File without changes
|