mdbq 1.5.7__tar.gz → 1.5.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbq-1.5.7 → mdbq-1.5.8}/PKG-INFO +1 -1
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/aggregation/query_data.py +294 -12
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/mysql/s_query.py +37 -34
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq.egg-info/PKG-INFO +1 -1
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq.egg-info/SOURCES.txt +0 -1
- {mdbq-1.5.7 → mdbq-1.5.8}/setup.py +1 -1
- mdbq-1.5.7/mdbq/config/get_myconf_/345/206/262/347/252/201/346/226/207/344/273/266_Administrator_20240901101505.py +0 -131
- {mdbq-1.5.7 → mdbq-1.5.8}/README.txt +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/__init__.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/__version__.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/aggregation/__init__.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/aggregation/aggregation.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/aggregation/df_types.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/aggregation/mysql_types.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/aggregation/optimize_data.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/bdup/__init__.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/bdup/bdup.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/clean/__init__.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/clean/data_clean.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/company/__init__.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/company/copysh.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/config/__init__.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/config/get_myconf.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/config/products.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/config/set_support.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/config/update_conf.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/dataframe/__init__.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/dataframe/converter.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/log/__init__.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/log/mylogger.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/mongo/__init__.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/mongo/mongo.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/mysql/__init__.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/mysql/mysql.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/mysql/year_month_day.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/other/__init__.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/other/porxy.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/other/pov_city.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/other/ua_sj.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/pbix/__init__.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/pbix/pbix_refresh.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/pbix/refresh_all.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq/spider/__init__.py +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq.egg-info/dependency_links.txt +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/mdbq.egg-info/top_level.txt +0 -0
- {mdbq-1.5.7 → mdbq-1.5.8}/setup.cfg +0 -0
@@ -138,6 +138,57 @@ class MysqlDatasQuery:
|
|
138
138
|
)
|
139
139
|
return df
|
140
140
|
|
141
|
+
def tg_rqbb(self):
|
142
|
+
start_date, end_date = self.months_data(num=self.months)
|
143
|
+
projection = {
|
144
|
+
'日期': 1,
|
145
|
+
'场景名字': 1,
|
146
|
+
'主体id': 1,
|
147
|
+
'花费': 1,
|
148
|
+
'展现量': 1,
|
149
|
+
'点击量': 1,
|
150
|
+
'总购物车数': 1,
|
151
|
+
'总成交笔数': 1,
|
152
|
+
'总成交金额': 1,
|
153
|
+
'直接成交笔数': 1,
|
154
|
+
'直接成交金额': 1,
|
155
|
+
'人群名字': 1,
|
156
|
+
}
|
157
|
+
df = self.download.data_to_df(
|
158
|
+
db_name='推广数据2',
|
159
|
+
table_name='人群报表',
|
160
|
+
start_date=start_date,
|
161
|
+
end_date=end_date,
|
162
|
+
projection=projection,
|
163
|
+
)
|
164
|
+
return df
|
165
|
+
|
166
|
+
def tg_gjc(self):
|
167
|
+
start_date, end_date = self.months_data(num=self.months)
|
168
|
+
projection = {
|
169
|
+
'日期': 1,
|
170
|
+
'场景名字': 1,
|
171
|
+
'宝贝id': 1,
|
172
|
+
'词类型': 1,
|
173
|
+
'词名字/词包名字': 1,
|
174
|
+
'花费': 1,
|
175
|
+
'展现量': 1,
|
176
|
+
'点击量': 1,
|
177
|
+
'总购物车数': 1,
|
178
|
+
'总成交笔数': 1,
|
179
|
+
'总成交金额': 1,
|
180
|
+
'直接成交笔数': 1,
|
181
|
+
'直接成交金额': 1,
|
182
|
+
}
|
183
|
+
df = self.download.data_to_df(
|
184
|
+
db_name='推广数据2',
|
185
|
+
table_name='关键词报表',
|
186
|
+
start_date=start_date,
|
187
|
+
end_date=end_date,
|
188
|
+
projection=projection,
|
189
|
+
)
|
190
|
+
return df
|
191
|
+
|
141
192
|
def idbm(self):
|
142
193
|
""" 用生意经日数据制作商品 id 和编码对照表 """
|
143
194
|
data_values = self.download.columns_to_list(
|
@@ -213,6 +264,55 @@ class MysqlDatasQuery:
|
|
213
264
|
df = pd.DataFrame(data=data_values)
|
214
265
|
return df
|
215
266
|
|
267
|
+
def jdjzt(self):
|
268
|
+
start_date, end_date = self.months_data(num=self.months)
|
269
|
+
projection = {
|
270
|
+
'日期': 1,
|
271
|
+
'产品线': 1,
|
272
|
+
'触发sku id': 1,
|
273
|
+
'跟单sku id': 1,
|
274
|
+
'花费': 1,
|
275
|
+
'展现数': 1,
|
276
|
+
'点击数': 1,
|
277
|
+
'直接订单行': 1,
|
278
|
+
'直接订单金额': 1,
|
279
|
+
'总订单行': 1,
|
280
|
+
'总订单金额': 1,
|
281
|
+
'直接加购数': 1,
|
282
|
+
'总加购数': 1,
|
283
|
+
'spu id': 1,
|
284
|
+
}
|
285
|
+
df = self.download.data_to_df(
|
286
|
+
db_name='京东数据2',
|
287
|
+
table_name='推广数据_京准通',
|
288
|
+
start_date=start_date,
|
289
|
+
end_date=end_date,
|
290
|
+
projection=projection,
|
291
|
+
)
|
292
|
+
return df
|
293
|
+
|
294
|
+
def sku_sales(self):
|
295
|
+
start_date, end_date = self.months_data(num=self.months)
|
296
|
+
projection = {
|
297
|
+
'日期': 1,
|
298
|
+
'商品id': 1,
|
299
|
+
'货号': 1,
|
300
|
+
'成交单量': 1,
|
301
|
+
'成交金额': 1,
|
302
|
+
'访客数': 1,
|
303
|
+
'成交客户数': 1,
|
304
|
+
'加购商品件数': 1,
|
305
|
+
'加购人数': 1,
|
306
|
+
}
|
307
|
+
df = self.download.data_to_df(
|
308
|
+
db_name='京东数据2',
|
309
|
+
table_name='sku_商品明细',
|
310
|
+
start_date=start_date,
|
311
|
+
end_date=end_date,
|
312
|
+
projection=projection,
|
313
|
+
)
|
314
|
+
return df
|
315
|
+
|
216
316
|
@staticmethod
|
217
317
|
def months_data(num=0, end_date=None):
|
218
318
|
""" 读取近 num 个月的数据, 0 表示读取当月的数据 """
|
@@ -236,6 +336,7 @@ class GroupBy:
|
|
236
336
|
else:
|
237
337
|
self.output = os.path.join('数据中心/数据库导出')
|
238
338
|
self.data_tgyj = {} # 推广综合聚合数据表
|
339
|
+
self.data_jdtg = {} # 京东推广数据,聚合数据
|
239
340
|
|
240
341
|
@staticmethod
|
241
342
|
def try_except(func): # 在类内部定义一个异常处理方法
|
@@ -319,6 +420,88 @@ class GroupBy:
|
|
319
420
|
}
|
320
421
|
)
|
321
422
|
return df
|
423
|
+
elif '人群报表' in table_name:
|
424
|
+
df.rename(columns={
|
425
|
+
'场景名字': '营销场景',
|
426
|
+
'主体id': '商品id',
|
427
|
+
'总购物车数': '加购量',
|
428
|
+
'总成交笔数': '成交笔数',
|
429
|
+
'总成交金额': '成交金额'
|
430
|
+
}, inplace=True)
|
431
|
+
df = df.astype({
|
432
|
+
'商品id': str,
|
433
|
+
'花费': float,
|
434
|
+
'展现量': int,
|
435
|
+
'点击量': int,
|
436
|
+
'加购量': int,
|
437
|
+
'成交笔数': int,
|
438
|
+
'成交金额': float,
|
439
|
+
'直接成交笔数': int,
|
440
|
+
'直接成交金额': float,
|
441
|
+
}, errors='raise')
|
442
|
+
df.fillna(0, inplace=True)
|
443
|
+
if is_maximize:
|
444
|
+
df = df.groupby(['日期', '营销场景', '商品id', '花费', '展现量', '点击量', '人群名字'], as_index=False).agg(
|
445
|
+
**{'加购量': ('加购量', np.max),
|
446
|
+
'成交笔数': ('成交笔数', np.max),
|
447
|
+
'成交金额': ('成交金额', np.max),
|
448
|
+
'直接成交笔数': ('直接成交笔数', np.max),
|
449
|
+
'直接成交金额': ('直接成交金额', np.max)
|
450
|
+
}
|
451
|
+
)
|
452
|
+
else:
|
453
|
+
df = df.groupby(['日期', '营销场景', '商品id', '花费', '展现量', '点击量', '人群名字'], as_index=False).agg(
|
454
|
+
**{
|
455
|
+
'加购量': ('加购量', np.min),
|
456
|
+
'成交笔数': ('成交笔数', np.min),
|
457
|
+
'成交金额': ('成交金额', np.min),
|
458
|
+
'直接成交笔数': ('直接成交笔数', np.max),
|
459
|
+
'直接成交金额': ('直接成交金额', np.max)
|
460
|
+
}
|
461
|
+
)
|
462
|
+
df.insert(loc=1, column='推广渠道', value='万相台无界版') # df中插入新列
|
463
|
+
return df
|
464
|
+
elif '关键词报表' in table_name:
|
465
|
+
df.rename(columns={
|
466
|
+
'场景名字': '营销场景',
|
467
|
+
'宝贝id': '商品id',
|
468
|
+
'总购物车数': '加购量',
|
469
|
+
'总成交笔数': '成交笔数',
|
470
|
+
'总成交金额': '成交金额'
|
471
|
+
}, inplace=True)
|
472
|
+
df = df.astype({
|
473
|
+
'商品id': str,
|
474
|
+
'花费': float,
|
475
|
+
'展现量': int,
|
476
|
+
'点击量': int,
|
477
|
+
'加购量': int,
|
478
|
+
'成交笔数': int,
|
479
|
+
'成交金额': float,
|
480
|
+
'直接成交笔数': int,
|
481
|
+
'直接成交金额': float,
|
482
|
+
}, errors='raise')
|
483
|
+
df.fillna(0, inplace=True)
|
484
|
+
if is_maximize:
|
485
|
+
df = df.groupby(['日期', '营销场景', '商品id', '词类型', '词名字/词包名字', '花费', '展现量', '点击量'], as_index=False).agg(
|
486
|
+
**{'加购量': ('加购量', np.max),
|
487
|
+
'成交笔数': ('成交笔数', np.max),
|
488
|
+
'成交金额': ('成交金额', np.max),
|
489
|
+
'直接成交笔数': ('直接成交笔数', np.max),
|
490
|
+
'直接成交金额': ('直接成交金额', np.max)
|
491
|
+
}
|
492
|
+
)
|
493
|
+
else:
|
494
|
+
df = df.groupby(['日期', '营销场景', '商品id', '词类型', '词名字/词包名字', '花费', '展现量', '点击量'], as_index=False).agg(
|
495
|
+
**{
|
496
|
+
'加购量': ('加购量', np.min),
|
497
|
+
'成交笔数': ('成交笔数', np.min),
|
498
|
+
'成交金额': ('成交金额', np.min),
|
499
|
+
'直接成交笔数': ('直接成交笔数', np.max),
|
500
|
+
'直接成交金额': ('直接成交金额', np.max)
|
501
|
+
}
|
502
|
+
)
|
503
|
+
df.insert(loc=1, column='推广渠道', value='万相台无界版') # df中插入新列
|
504
|
+
return df
|
322
505
|
elif '宝贝指标' in table_name:
|
323
506
|
""" 聚合时不可以加商家编码,编码有些是空白,有些是 0 """
|
324
507
|
df['宝贝id'] = df['宝贝id'].astype(str)
|
@@ -456,6 +639,38 @@ class GroupBy:
|
|
456
639
|
}
|
457
640
|
)
|
458
641
|
return df
|
642
|
+
elif '京准通' in table_name:
|
643
|
+
df = df.groupby(['日期', '产品线', '触发sku id', '跟单sku id', 'spu id', '花费', '展现数', '点击数'], as_index=False).agg(
|
644
|
+
**{'直接订单行': ('直接订单行', np.max),
|
645
|
+
'直接订单金额': ('直接订单金额', np.max),
|
646
|
+
'总订单行': ('总订单行', np.max),
|
647
|
+
'总订单金额': ('总订单金额', np.max),
|
648
|
+
'直接加购数': ('直接加购数', np.max),
|
649
|
+
'总加购数': ('总加购数', np.max),
|
650
|
+
}
|
651
|
+
)
|
652
|
+
df = df[df['花费'] > 0]
|
653
|
+
self.data_jdtg.update(
|
654
|
+
{
|
655
|
+
table_name: df[['日期', '产品线', '触发sku id', '跟单sku id', '花费']],
|
656
|
+
}
|
657
|
+
)
|
658
|
+
return df
|
659
|
+
elif '京东_sku_商品明细' in table_name:
|
660
|
+
df = df[df['商品id'] != '合计']
|
661
|
+
df = df.groupby(['日期', '商品id', '货号', '访客数', '成交客户数', '加购商品件数', '加购人数'],
|
662
|
+
as_index=False).agg(
|
663
|
+
**{
|
664
|
+
'成交单量': ('成交单量', np.max),
|
665
|
+
'成交金额': ('成交金额', np.max),
|
666
|
+
}
|
667
|
+
)
|
668
|
+
self.data_jdtg.update(
|
669
|
+
{
|
670
|
+
table_name: df,
|
671
|
+
}
|
672
|
+
)
|
673
|
+
return df
|
459
674
|
else:
|
460
675
|
print(f'<{table_name}>: Groupby 类尚未配置,数据为空')
|
461
676
|
return pd.DataFrame({})
|
@@ -487,13 +702,47 @@ class GroupBy:
|
|
487
702
|
df = pd.merge(df, syj, how='left', left_on=['日期', '商品id'], right_on=['日期', '宝贝id'])
|
488
703
|
df.drop(labels='宝贝id', axis=1, inplace=True)
|
489
704
|
df.drop_duplicates(subset=['日期', '商品id', '花费', '销售额'], keep='last', inplace=True, ignore_index=True)
|
705
|
+
df.fillna(0, inplace=True)
|
490
706
|
df['成本价'] = df['成本价'].astype('float64')
|
707
|
+
df['销售额'] = df['销售额'].astype('float64')
|
708
|
+
df['销售量'] = df['销售量'].astype('int64')
|
491
709
|
df['商品成本'] = df.apply(lambda x: (x['成本价'] + x['销售额']/x['销售量'] * 0.11 + 6) * x['销售量'] if x['销售量'] > 0 else 0, axis=1)
|
492
710
|
df['商品毛利'] = df.apply(lambda x: x['销售额'] - x['商品成本'], axis=1)
|
493
711
|
df['毛利率'] = df.apply(lambda x: round((x['销售额'] - x['商品成本']) / x['销售额'], 4) if x['销售额'] > 0 else 0, axis=1)
|
494
712
|
df['盈亏'] = df.apply(lambda x: x['商品毛利'] - x['花费'], axis=1)
|
495
713
|
return df
|
496
714
|
|
715
|
+
def performance_jd(self, jd_tg=True):
|
716
|
+
jdtg, sku_sales = self.data_jdtg['京东_京准通'], self.data_jdtg['京东_sku_商品明细']
|
717
|
+
jdtg = jdtg.groupby(['日期', '跟单sku id'],
|
718
|
+
as_index=False).agg(
|
719
|
+
**{
|
720
|
+
'花费': ('花费', np.sum)
|
721
|
+
}
|
722
|
+
)
|
723
|
+
cost = self.data_tgyj['商品成本']
|
724
|
+
df = pd.merge(sku_sales, cost, how='left', left_on='货号', right_on='款号')
|
725
|
+
df = df[['日期', '商品id', '货号', '成交单量', '成交金额', '成本价']]
|
726
|
+
if jd_tg is True:
|
727
|
+
# 完整的数据表,包含全店所有推广、销售数据
|
728
|
+
df = pd.merge(df, jdtg, how='left', left_on=['日期', '商品id'], right_on=['日期', '跟单sku id']) # df 合并推广表
|
729
|
+
else:
|
730
|
+
df = pd.merge(jdtg, df, how='left', left_on=['日期', '跟单sku id'], right_on=['日期', '商品id']) # 推广表合并 df
|
731
|
+
df = df[['日期', '跟单sku id', '花费', '货号', '成交单量', '成交金额', '成本价']]
|
732
|
+
df.fillna(0, inplace=True)
|
733
|
+
df['成本价'] = df['成本价'].astype('float64')
|
734
|
+
df['成交金额'] = df['成交金额'].astype('float64')
|
735
|
+
df['花费'] = df['花费'].astype('float64')
|
736
|
+
df['成交单量'] = df['成交单量'].astype('int64')
|
737
|
+
df['商品成本'] = df.apply(
|
738
|
+
lambda x: (x['成本价'] + x['成交金额'] / x['成交单量'] * 0.11 + 6) * x['成交单量'] if x['成交单量'] > 0 else 0,
|
739
|
+
axis=1)
|
740
|
+
df['商品毛利'] = df.apply(lambda x: x['成交金额'] - x['商品成本'], axis=1)
|
741
|
+
df['毛利率'] = df.apply(
|
742
|
+
lambda x: round((x['成交金额'] - x['商品成本']) / x['成交金额'], 4) if x['成交金额'] > 0 else 0, axis=1)
|
743
|
+
df['盈亏'] = df.apply(lambda x: x['商品毛利'] - x['花费'], axis=1)
|
744
|
+
return df
|
745
|
+
|
497
746
|
def as_csv(self, df, filename, path=None, encoding='utf-8_sig',
|
498
747
|
index=False, header=True, st_ascend=None, ascend=None, freq=None):
|
499
748
|
"""
|
@@ -592,20 +841,21 @@ def data_aggregation_one(service_databases=[{}], months=1):
|
|
592
841
|
|
593
842
|
# 从数据库中获取数据, 返回包含 df 数据的字典
|
594
843
|
# 单独处理某一个聚合数据库,在这里修改添加 data_dict 的值
|
844
|
+
######################################################
|
845
|
+
################# 修改这里 ##########################
|
846
|
+
######################################################
|
595
847
|
data_dict = [
|
596
848
|
{
|
597
849
|
'数据库名': '聚合数据',
|
598
|
-
'集合名': '
|
599
|
-
'唯一主键': ['日期', '
|
600
|
-
'数据主体': sdq.
|
601
|
-
},
|
602
|
-
{
|
603
|
-
'数据库名': '聚合数据',
|
604
|
-
'集合名': '天猫_店铺来源_日数据',
|
605
|
-
'唯一主键': ['日期', '一级来源', '二级来源', '三级来源'],
|
606
|
-
'数据主体': sdq.dplyd(),
|
850
|
+
'集合名': '京东_sku_商品明细',
|
851
|
+
'唯一主键': ['日期', '商品id', '成交单量'],
|
852
|
+
'数据主体': sdq.sku_sales(),
|
607
853
|
},
|
608
854
|
]
|
855
|
+
######################################################
|
856
|
+
################# 修改这里 ##########################
|
857
|
+
######################################################
|
858
|
+
|
609
859
|
for items in data_dict: # 遍历返回结果
|
610
860
|
db_name, table_name, unique_key_list, df = items['数据库名'], items['集合名'], items['唯一主键'], items['数据主体']
|
611
861
|
df = g.groupby(df=df, table_name=table_name, is_maximize=True) # 2. 聚合数据
|
@@ -680,6 +930,30 @@ def data_aggregation(service_databases=[{}], months=1):
|
|
680
930
|
'唯一主键': ['款号'],
|
681
931
|
'数据主体': sdq.sp_cost(),
|
682
932
|
},
|
933
|
+
{
|
934
|
+
'数据库名': '聚合数据',
|
935
|
+
'集合名': '京东_京准通',
|
936
|
+
'唯一主键': ['日期', '产品线', '触发sku id', '跟单sku id', '花费', ],
|
937
|
+
'数据主体': sdq.jdjzt(),
|
938
|
+
},
|
939
|
+
{
|
940
|
+
'数据库名': '聚合数据',
|
941
|
+
'集合名': '京东_sku_商品明细',
|
942
|
+
'唯一主键': ['日期', '商品id', '成交单量'],
|
943
|
+
'数据主体': sdq.sku_sales(),
|
944
|
+
},
|
945
|
+
{
|
946
|
+
'数据库名': '聚合数据',
|
947
|
+
'集合名': '天猫_人群报表',
|
948
|
+
'唯一主键': ['日期', '推广渠道', '营销场景', '商品id', '花费', '人群名字'],
|
949
|
+
'数据主体': sdq.tg_rqbb(),
|
950
|
+
},
|
951
|
+
{
|
952
|
+
'数据库名': '聚合数据',
|
953
|
+
'集合名': '天猫_关键词报表',
|
954
|
+
'唯一主键': ['日期', '推广渠道', '营销场景', '商品id', '花费', '词类型', '词名字/词包名字',],
|
955
|
+
'数据主体': sdq.tg_gjc(),
|
956
|
+
},
|
683
957
|
]
|
684
958
|
for items in data_dict: # 遍历返回结果
|
685
959
|
db_name, table_name, unique_key_list, df = items['数据库名'], items['集合名'], items['唯一主键'], items['数据主体']
|
@@ -711,14 +985,22 @@ def data_aggregation(service_databases=[{}], months=1):
|
|
711
985
|
icm_update=['日期', '商品id'], # 设置唯一主键
|
712
986
|
service_database=service_database,
|
713
987
|
)
|
988
|
+
res = g.performance_jd(jd_tg=False) # 盈亏表,依赖其他表,单独做
|
989
|
+
m.df_to_mysql(
|
990
|
+
df=res,
|
991
|
+
db_name='聚合数据',
|
992
|
+
table_name='_京东_推广商品销售',
|
993
|
+
drop_duplicates=False,
|
994
|
+
icm_update=['日期', '跟单sku id', '货号', '花费'], # 设置唯一主键
|
995
|
+
service_database=service_database,
|
996
|
+
)
|
714
997
|
|
715
998
|
# 这里要注释掉,不然 copysh.py 可能有问题,这里主要修改配置文件,后续触发 home_lx 的 optimize_datas.py(有s)程序进行全局清理
|
716
999
|
# optimize_data.op_data(service_databases=service_databases, days=3650) # 立即启动对聚合数据的清理工作
|
717
1000
|
|
718
1001
|
|
719
1002
|
if __name__ == '__main__':
|
720
|
-
|
721
|
-
data_aggregation_one(service_databases=[{'company': 'mysql'}], months=1)
|
1003
|
+
data_aggregation(service_databases=[{'company': 'mysql'}], months=1) # 正常的聚合所有数据
|
1004
|
+
# data_aggregation_one(service_databases=[{'company': 'mysql'}], months=1) # 单独聚合某一个数据库,具体库进函数编辑
|
722
1005
|
# optimize_data.op_data(service_databases=[{'company': 'mysql'}], days=3650) # 立即启动对聚合数据的清理工作
|
723
1006
|
|
724
|
-
|
@@ -46,41 +46,44 @@ class QueryDatas:
|
|
46
46
|
|
47
47
|
self.config.update({'database': db_name})
|
48
48
|
connection = pymysql.connect(**self.config) # 重新连接数据库
|
49
|
-
try:
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
49
|
+
# try:
|
50
|
+
with connection.cursor() as cursor:
|
51
|
+
# 3. 获取数据表的所有列信息
|
52
|
+
sql = 'SELECT `COLUMN_NAME` FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
|
53
|
+
cursor.execute(sql, (db_name, {table_name}))
|
54
|
+
columns = cursor.fetchall()
|
55
|
+
cols_exist = [col['COLUMN_NAME'] for col in columns] # 数据表的所有列, 返回 list
|
56
|
+
|
57
|
+
# 4. 构建 SQL 查询语句
|
58
|
+
if projection: # 获取指定列
|
59
|
+
columns_in = []
|
60
|
+
for key, value in projection.items():
|
61
|
+
if value == 1 and key in cols_exist:
|
62
|
+
columns_in.append(key) # 提取值为 1 的键并清理不在数据表的键
|
63
|
+
columns_in = [f"`{item}`" for item in columns_in]
|
64
|
+
columns_in = ', '.join(columns_in)
|
65
|
+
if '日期' in cols_exist: # 不论是否指定, 只要数据表有日期,则执行
|
66
|
+
sql = (f"SELECT {columns_in} FROM `{db_name}`.`{table_name}` "
|
67
|
+
f"WHERE {'日期'} BETWEEN '{start_date}' AND '{end_date}'")
|
68
|
+
else: # 数据表没有日期列时,返回指定列的所有数据
|
69
|
+
sql = f"SELECT {columns_in} FROM `{db_name}`.`{table_name}`"
|
70
|
+
else: # 没有指定获取列时
|
71
|
+
if '日期' in cols_exist: # 但数据表有日期,仍然执行
|
72
|
+
cols_exist = [f"`{item}`" for item in cols_exist]
|
73
|
+
columns_in = ', '.join(cols_exist)
|
74
|
+
sql = (f"SELECT {columns_in} FROM `{db_name}`.`{table_name}` "
|
75
|
+
f"WHERE {'日期'} BETWEEN '{start_date}' AND '{end_date}'")
|
76
|
+
else: # 没有指定获取列,且数据表也没有日期列,则返回全部列的全部数据
|
77
|
+
all_col = ', '.join([f"`{item}`" for item in cols_exist if item != 'id'])
|
78
|
+
sql = f"SELECT %s FROM `%s`.`%s`" % (all_col, db_name, table_name)
|
79
|
+
cursor.execute(sql)
|
80
|
+
rows = cursor.fetchall() # 获取查询结果
|
81
|
+
columns = [desc[0] for desc in cursor.description]
|
82
|
+
df = pd.DataFrame(rows, columns=columns) # 转为 df
|
83
|
+
# except Exception as e:
|
84
|
+
# print(f'{e}')
|
82
85
|
return df
|
83
|
-
finally:
|
86
|
+
# finally:
|
84
87
|
connection.close()
|
85
88
|
|
86
89
|
if len(df) == 0:
|
@@ -1,131 +0,0 @@
|
|
1
|
-
# -*- coding: UTF-8 –*-
|
2
|
-
import platform
|
3
|
-
import getpass
|
4
|
-
import socket
|
5
|
-
import configparser
|
6
|
-
import os
|
7
|
-
import sys
|
8
|
-
from posixpath import dirname
|
9
|
-
from urllib import parse
|
10
|
-
from mdbq.config import set_support
|
11
|
-
|
12
|
-
|
13
|
-
class MyConf:
|
14
|
-
"""
|
15
|
-
读取配置文件信息
|
16
|
-
"""
|
17
|
-
def __init__(self, path='support'):
|
18
|
-
self.top_path = os.path.realpath(os.path.dirname(sys.argv[0])) # 程序运行目录, 打包时使用
|
19
|
-
self.conf_file = os.path.join(self.top_path, path, '.my_conf')
|
20
|
-
self.config = None
|
21
|
-
|
22
|
-
def get_myconf(self, options: list):
|
23
|
-
if not os.path.exists(self.conf_file):
|
24
|
-
print(f'尚未配置: 缺少 .my_conf 文件')
|
25
|
-
return
|
26
|
-
if not options:
|
27
|
-
print(f'传入的参数为空: {options}')
|
28
|
-
return
|
29
|
-
self.config = configparser.ConfigParser()
|
30
|
-
self.config.read(self.conf_file, 'UTF-8')
|
31
|
-
results = []
|
32
|
-
for option in options:
|
33
|
-
try:
|
34
|
-
results.append(self.config.get('database', option))
|
35
|
-
except configparser.NoOptionError:
|
36
|
-
results.append('')
|
37
|
-
return results
|
38
|
-
|
39
|
-
|
40
|
-
def select_config_values(target_service, database, path=None):
|
41
|
-
"""
|
42
|
-
target_service: 指向: home_lx, aliyun
|
43
|
-
database: 指向: mongodb, mysql
|
44
|
-
"""
|
45
|
-
if not path:
|
46
|
-
path = set_support.SetSupport(dirname='support').dirname
|
47
|
-
|
48
|
-
m = MyConf(path=path)
|
49
|
-
options = []
|
50
|
-
if target_service == 'home_lx': # 1. 家里笔记本
|
51
|
-
if database == 'mongodb':
|
52
|
-
if socket.gethostname() == 'xigua_lx':
|
53
|
-
# 本机自身运行使用 127.0.0.1
|
54
|
-
options = ['username_db_lx_nw', 'password_db_lx_nw', 'host_bd', 'port_db_lx_nw',]
|
55
|
-
elif socket.gethostname() == 'xigua1' or socket.gethostname() == 'MacBook-Pro':
|
56
|
-
# 内网地址:正在运行的是 家里笔记本或者台式机,或者 macbook pro
|
57
|
-
options = ['username_db_lx_nw', 'password_db_lx_nw', 'host_db_lx_nw', 'port_db_lx_nw',]
|
58
|
-
else:
|
59
|
-
options = ['username_db_lx', 'password_db_lx', 'host_db_lx', 'port_db_lx']
|
60
|
-
|
61
|
-
elif database == 'mysql':
|
62
|
-
if socket.gethostname() == 'xigua_lx':
|
63
|
-
# 本机自身运行使用 127.0.0.1
|
64
|
-
options = ['username_mysql_lx_nw', 'password_mysql_lx_nw', 'host_bd', 'port_mysql_lx_nw',]
|
65
|
-
elif socket.gethostname() == 'xigua1' or socket.gethostname() == 'MacBook-Pro':
|
66
|
-
# 内网地址:正在运行的是 家里笔记本或者台式机,或者 macb ook pro
|
67
|
-
options = ['username_mysql_lx_nw', 'password_mysql_lx_nw', 'host_mysql_lx_nw', 'port_mysql_lx_nw',]
|
68
|
-
else:
|
69
|
-
options = ['username_mysql_lx', 'password_mysql_lx', 'host_mysql_lx', 'port_mysql_lx']
|
70
|
-
|
71
|
-
elif target_service == 'home_xigua1':
|
72
|
-
if database == 'mongodb':
|
73
|
-
print('未配置')
|
74
|
-
elif database == 'mysql':
|
75
|
-
if socket.gethostname() == 'xigua_lx':
|
76
|
-
# 本机自身运行使用 127.0.0.1
|
77
|
-
options = ['username_mysql_xigua1_nw', 'password_mysql_xigua1_nw', 'host_mysql_xigua1_nw', 'port_mysql_xigua1_nw',]
|
78
|
-
elif socket.gethostname() == 'xigua1' or socket.gethostname() == 'macbook pro':
|
79
|
-
# 内网地址:正在运行的是 家里笔记本或者台式机,或者 macb ook pro
|
80
|
-
options = ['username_mysql_xigua1_nw', 'password_mysql_xigua1_nw', 'host_bd', 'port_mysql_xigua1_nw',]
|
81
|
-
else:
|
82
|
-
print('未配置')
|
83
|
-
options = ['', '', '', '']
|
84
|
-
|
85
|
-
elif target_service == 'aliyun': # 2. 阿里云服务器
|
86
|
-
if database == 'mongodb':
|
87
|
-
if socket.gethostname() == 'xigua-cloud':
|
88
|
-
# 阿里云自身运行使用 127.0.0.1
|
89
|
-
options = ['username_db_aliyun', 'password_db_aliyun', 'host_bd', 'port_db_aliyun', ]
|
90
|
-
else:
|
91
|
-
options = ['username_db_aliyun', 'password_db_aliyun', 'host_db_aliyun', 'port_db_aliyun', ]
|
92
|
-
elif database == 'mysql':
|
93
|
-
if socket.gethostname() == 'xigua-cloud':
|
94
|
-
# 阿里云自身运行使用 127.0.0.1
|
95
|
-
options = ['username_mysql_aliyun', 'password_mysql_aliyun', 'host_bd', 'port_mysql_aliyun', ]
|
96
|
-
else:
|
97
|
-
options = ['username_mysql_aliyun', 'password_mysql_aliyun', 'host_mysql_aliyun', 'port_mysql_aliyun', ]
|
98
|
-
|
99
|
-
elif target_service == 'company': # 3. 公司台式机
|
100
|
-
if database == 'mongodb':
|
101
|
-
options = ['username_db_company_nw', 'password_db_company_nw', 'host_db_company_nw', 'port_db_company_nw', ]
|
102
|
-
elif database == 'mysql':
|
103
|
-
options = ['username_mysql_company_nw', 'password_mysql_company_nw', 'host_mysql_company_nw', 'port_mysql_company_nw', ]
|
104
|
-
|
105
|
-
elif target_service == 'nas': # 4. 群晖
|
106
|
-
if database == 'mysql':
|
107
|
-
options = ['username_mysql_nas_nw', 'password_mysql_nas_nw', 'host_mysql_nas_nw', 'port_mysql_nas_nw', ]
|
108
|
-
|
109
|
-
value = m.get_myconf(options=options)
|
110
|
-
if not value:
|
111
|
-
return '', '', '', 0
|
112
|
-
if database == 'mongodb': # mongodb 特殊字符要转码, mysql 不需要转
|
113
|
-
username = parse.quote_plus(str(value[0]).strip()) # 对可能存在的特殊字符进行编码
|
114
|
-
password = parse.quote_plus(str(value[1]).strip()) # 如果密码含有 @、/ 字符,一定要进行编码
|
115
|
-
else:
|
116
|
-
username = str(value[0]).strip()
|
117
|
-
password = str(value[1]).strip()
|
118
|
-
host = str(value[2]).strip()
|
119
|
-
port = int(value[3])
|
120
|
-
return username, password, host, port
|
121
|
-
|
122
|
-
|
123
|
-
def main():
|
124
|
-
pass
|
125
|
-
|
126
|
-
|
127
|
-
if __name__ == '__main__':
|
128
|
-
# main()
|
129
|
-
r, d, s, g = select_config_values(target_service='home_lx', database='mysql')
|
130
|
-
print(r, d, s, g, type(r), type(d), type(s), type(g))
|
131
|
-
print(f'本机: {platform.system()} // {socket.gethostname()}')
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|