mdbq 1.2.5__py3-none-any.whl → 1.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -850,6 +850,8 @@ def upload_dir(path, db_name, collection_name, dbs={'mysql': True, 'mongodb': Tr
850
850
  df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
851
851
  if len(df) == 0:
852
852
  continue
853
+ # if '新版' not in name:
854
+ # continue
853
855
  cv = converter.DataFrameConverter()
854
856
  df = cv.convert_df_cols(df=df) # 清理列名和 df 中的非法字符
855
857
  try:
@@ -973,5 +975,13 @@ if __name__ == '__main__':
973
975
  # target_service='home_lx',
974
976
  # database='mysql'
975
977
  # )
978
+ # db_name = '生意参谋2'
979
+ # table_name = '店铺来源_日数据_新版'
980
+ # upload_dir(
981
+ # path='/Users/xigua/数据中心/原始文件2/生意参谋/流量来源',
982
+ # db_name=db_name,
983
+ # collection_name=table_name,
984
+ # dbs={'mysql': True, 'mongodb': False},
985
+ # )
976
986
 
977
987
  test2()
@@ -240,6 +240,7 @@ class GroupBy:
240
240
  '总成交金额': '成交金额'
241
241
  }, inplace=True)
242
242
  df = df.astype({
243
+ '商品id': str,
243
244
  '花费': float,
244
245
  '展现量': int,
245
246
  '点击量': int,
@@ -263,12 +264,13 @@ class GroupBy:
263
264
  )
264
265
  else:
265
266
  df = df.groupby(['日期', '营销场景', '商品id', '花费', '展现量', '点击量'], as_index=False).agg(
266
- **{'加购量': ('加购量', np.min),
267
- '成交笔数': ('成交笔数', np.min),
268
- '成交金额': ('成交金额', np.min),
269
- '自然流量曝光量': ('自然流量曝光量', np.min),
270
- '直接成交笔数': ('直接成交笔数', np.max),
271
- '直接成交金额': ('直接成交金额', np.max)
267
+ **{
268
+ '加购量': ('加购量', np.min),
269
+ '成交笔数': ('成交笔数', np.min),
270
+ '成交金额': ('成交金额', np.min),
271
+ '自然流量曝光量': ('自然流量曝光量', np.min),
272
+ '直接成交笔数': ('直接成交笔数', np.max),
273
+ '直接成交金额': ('直接成交金额', np.max)
272
274
  }
273
275
  )
274
276
  df.insert(loc=1, column='推广渠道', value='万相台无界版') # df中插入新列
@@ -276,10 +278,10 @@ class GroupBy:
276
278
  **{
277
279
  '花费': ('花费', np.sum),
278
280
  '成交笔数': ('成交笔数', np.max),
279
- '成交金额': ('成交金额', np.max),
280
- '自然流量曝光量': ('自然流量曝光量', np.max),
281
- '直接成交笔数': ('直接成交笔数', np.max),
282
- '直接成交金额': ('直接成交金额', np.max)
281
+ '成交金额': ('成交金额', np.max),
282
+ '自然流量曝光量': ('自然流量曝光量', np.max),
283
+ '直接成交笔数': ('直接成交笔数', np.max),
284
+ '直接成交金额': ('直接成交金额', np.max)
283
285
  }
284
286
  )
285
287
  self.data_tgyj.update(
@@ -290,6 +292,7 @@ class GroupBy:
290
292
  return df
291
293
  elif '宝贝指标' in table_name:
292
294
  """ 聚合时不可以加商家编码,编码有些是空白,有些是 0 """
295
+ df['宝贝id'] = df['宝贝id'].astype(str)
293
296
  df.fillna(0, inplace=True)
294
297
  # df = df[(df['销售额'] != 0) | (df['退款额'] != 0)] # 注释掉, 因为后续使用生意经作为基准合并推广表,需确保所有商品id 齐全
295
298
  df = df.groupby(['日期', '宝贝id', '行业类目'], as_index=False).agg(
@@ -320,6 +323,7 @@ class GroupBy:
320
323
  elif '店铺来源_日数据' in table_name:
321
324
  return df
322
325
  elif '商品id编码表' in table_name:
326
+ df['宝贝id'] = df['宝贝id'].astype(str)
323
327
  df.drop_duplicates(subset='宝贝id', keep='last', inplace=True, ignore_index=True)
324
328
  # df['行业类目'] = df['行业类目'].apply(lambda x: re.sub(' ', '', x))
325
329
  try:
@@ -359,6 +363,7 @@ class GroupBy:
359
363
  table_name: df[['商品id', '商品图片']],
360
364
  }
361
365
  )
366
+ df['商品id'] = df['商品id'].astype(str)
362
367
  return df
363
368
  elif '商品成本' in table_name:
364
369
  df.sort_values(by=['款号', '日期'], ascending=[False, True], ignore_index=True, inplace=True)
@@ -373,7 +378,7 @@ class GroupBy:
373
378
  print(f'<{table_name}>: Groupby 类尚未配置,数据为空')
374
379
  return pd.DataFrame({})
375
380
 
376
- @try_except
381
+ # @try_except
377
382
  def performance(self, bb_tg=True):
378
383
  # print(self.data_tgyj)
379
384
  tg, syj, idbm, pic, cost = (
@@ -390,13 +395,14 @@ class GroupBy:
390
395
  df = pd.merge(tg, df, how='left', left_on='商品id', right_on='宝贝id')
391
396
  df.drop(labels='宝贝id', axis=1, inplace=True)
392
397
  if bb_tg is True:
393
- # 生意经合并推广表,完整的数据表,包含全店所有推广、销售数据
398
+ # 生意经合并推广表,完整的数据表,包含全店所有推广、销售数据
394
399
  df = pd.merge(syj, df, how='left', left_on=['日期', '宝贝id'], right_on=['日期', '商品id'])
395
400
  else:
396
401
  # 推广表合并生意经 , 以推广数据为基准,销售数据不齐全
397
402
  df = pd.merge(df, syj, how='left', left_on=['日期', '商品id'], right_on=['日期', '宝贝id'])
398
403
  df.drop(labels='宝贝id', axis=1, inplace=True)
399
404
  df.drop_duplicates(subset=['日期', '商品id', '花费', '销售额'], keep='last', inplace=True, ignore_index=True)
405
+ df['成本价'] = df['成本价'].astype('float64')
400
406
  df['商品成本'] = df.apply(lambda x: (x['成本价'] + x['销售额']/x['销售量'] * 0.11 + 6) * x['销售量'] if x['销售量'] > 0 else 0, axis=1)
401
407
  df['商品毛利'] = df.apply(lambda x: x['销售额'] - x['商品成本'], axis=1)
402
408
  df['毛利率'] = df.apply(lambda x: round((x['销售额'] - x['商品成本']) / x['销售额'], 4) if x['销售额'] > 0 else 0, axis=1)
@@ -493,12 +499,13 @@ def data_aggregation(service_databases=[{}]):
493
499
  for service_database in service_databases:
494
500
  for service_name, database in service_database.items():
495
501
  sdq = MysqlDatasQuery(target_service=service_name) # 实例化数据处理类
496
- sdq.months = 1 # 设置数据周期, 1 表示近 2 个月
502
+ sdq.months = 0 # 设置数据周期, 1 表示近 2 个月
497
503
  g = GroupBy() # 实例化数据聚合类
498
504
  # 实例化数据库连接
499
505
  username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
500
506
  m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
501
507
 
508
+ # 从数据库中获取数据, 返回包含 df 数据的字典
502
509
  data_dict = [
503
510
  {
504
511
  '数据库名': '聚合数据',
@@ -531,19 +538,19 @@ def data_aggregation(service_databases=[{}]):
531
538
  '数据主体': sdq.sp_cost(),
532
539
  },
533
540
  ]
534
- for items in data_dict:
541
+ for items in data_dict: # 遍历返回结果
535
542
  db_name, table_name, df = items['数据库名'], items['集合名'], items['数据主体']
536
543
  df = g.groupby(df=df, table_name=table_name, is_maximize=True) # 2. 聚合数据
537
544
  # g.as_csv(df=df, filename=table_name + '.csv')
538
- m.df_to_mysql(df=df, db_name=db_name, table_name=table_name) # 3. 回传数据库
545
+ m.df_to_mysql(df=df, db_name=db_name, table_name=table_name, drop_dup=True) # 3. 回传数据库
539
546
  res = g.performance(bb_tg=True) # 盈亏表,依赖其他表,单独做
540
- m.df_to_mysql(df=res, db_name='聚合数据', table_name='_全店商品销售')
547
+ m.df_to_mysql(df=res, db_name='聚合数据', table_name='_全店商品销售', drop_dup=True)
541
548
  res = g.performance(bb_tg=False) # 盈亏表,依赖其他表,单独做
542
- m.df_to_mysql(df=res, db_name='聚合数据', table_name='_推广商品销售')
549
+ m.df_to_mysql(df=res, db_name='聚合数据', table_name='_推广商品销售', drop_dup=True)
543
550
 
544
551
  # optimize_data.op_data(service_databases=service_databases, days=3650) # 立即启动对聚合数据的清理工作
545
552
 
546
553
 
547
554
  if __name__ == '__main__':
548
- data_aggregation(service_databases=[{'company': 'mysql'}])
555
+ data_aggregation(service_databases=[{'home_lx': 'mysql'}])
549
556
  # optimize_data.op_data(service_databases=[{'company': 'mysql'}], days=3650) # 立即启动对聚合数据的清理工作
mdbq/mysql/mysql.py CHANGED
@@ -65,7 +65,7 @@ class MysqlUpload:
65
65
  将 df 写入数据库
66
66
  db_name: 数据库名称
67
67
  table_name: 集合/表名称
68
- df_sql: 使用 df.to_sql 函数上传整个表
68
+ df_sql: 使用 df.to_sql 函数上传整个表, 不会排重
69
69
  drop_duplicates:仅限于聚合数据使用,其他情况不要设置此参数
70
70
  drop_dup: 值为 True 时检查重复数据再插入,反之直接上传
71
71
  filename: 传这个参数是方便定位产生错误的文件
@@ -153,7 +153,7 @@ class MysqlUpload:
153
153
 
154
154
  if df_sql:
155
155
  now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
156
- print(f'{now}正在更新 mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count},{self.filename}')
156
+ print(f'{now}正在更新 mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count}, {self.filename}')
157
157
  engine = create_engine(
158
158
  f"mysql+pymysql://{self.username}:{self.password}@{self.host}:{self.port}/{db_name}") # 创建数据库引擎
159
159
  df.to_sql(
@@ -186,7 +186,7 @@ class MysqlUpload:
186
186
 
187
187
  # 5. 更新插入数据
188
188
  now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
189
- print(f'{now}正在更新 mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count},{self.filename}')
189
+ print(f'{now}正在更新 mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count}, {self.filename}')
190
190
 
191
191
  datas = df.to_dict(orient='records')
192
192
  for data in datas:
mdbq/mysql/s_query.py CHANGED
@@ -12,6 +12,7 @@ from sqlalchemy import create_engine
12
12
  import os
13
13
  import calendar
14
14
  from mdbq.config import get_myconf
15
+ from mdbq.dataframe import converter
15
16
 
16
17
  warnings.filterwarnings('ignore')
17
18
  """
@@ -84,6 +85,8 @@ class QueryDatas:
84
85
 
85
86
  if len(df) == 0:
86
87
  print(f'database: {db_name}, table: {table_name} 查询的数据为空')
88
+ cv = converter.DataFrameConverter()
89
+ df = cv.convert_df_cols(df)
87
90
  return df
88
91
 
89
92
  def columns_to_list(self, db_name, table_name, columns_name) -> list:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 1.2.5
3
+ Version: 1.2.6
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,11 +1,11 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
2
  mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
- mdbq/aggregation/aggregation.py,sha256=UKpV2PrazABh5dlOcmmhs6vfgMYxo9sk05SAn-fizSw,55402
4
+ mdbq/aggregation/aggregation.py,sha256=mBgIY7afloW8H5qoBy56vCabIQRxVvAhrRZgGbZUxFQ,55791
5
5
  mdbq/aggregation/df_types.py,sha256=rHLIgv82PJSFmDvXkZyOJAffXkFyyMyFO23w9tUt8EQ,7525
6
6
  mdbq/aggregation/mysql_types.py,sha256=umVixmbFZM63k-QhVWLvOuhcAde4P_oDKbdo8ry2O9w,10633
7
7
  mdbq/aggregation/optimize_data.py,sha256=jLAWtxPUuhpo4XTVrhKtT4xK3grs7r73ePQfLhxlu1I,779
8
- mdbq/aggregation/query_data.py,sha256=s1QjZIr0ZODgiqbH6Rh-7jtTY5JJXpx7o5lgE32rGqE,25249
8
+ mdbq/aggregation/query_data.py,sha256=fg_9OdNSwHbo9vhK1pAKOazHFHZfE9_rBxRyQIWJX9U,25694
9
9
  mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
10
10
  mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
11
11
  mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
@@ -24,8 +24,8 @@ mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
24
24
  mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
25
25
  mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
26
26
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
27
- mdbq/mysql/mysql.py,sha256=9lAUY6-AcLctvoqa7qFsNtWxBA4DBFNDzdkLJCkbr48,37528
28
- mdbq/mysql/s_query.py,sha256=4c24SwbqtnO33o8CgWlTQ_j8sZYl5BRIQkaD9CI-vTY,7901
27
+ mdbq/mysql/mysql.py,sha256=KvUQflP5sYOECTHOs2Fs9ABcQvgPCbBnAX2ZlE3JjgY,37544
28
+ mdbq/mysql/s_query.py,sha256=a33aYhW6gAnspIZfQ7l23ePln9-MD1f_ukypr5M0jd8,8018
29
29
  mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
30
30
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
31
31
  mdbq/other/porxy.py,sha256=UHfgEyXugogvXgsG68a7QouUCKaohTKKkI4RN-kYSdQ,4961
@@ -35,7 +35,7 @@ mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
35
35
  mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
36
36
  mdbq/pbix/refresh_all.py,sha256=tgy762608HMaXWynbOURIf2UVMuSPybzrDXQnOOcnZU,6102
37
37
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
38
- mdbq-1.2.5.dist-info/METADATA,sha256=yz9ZMV0ZFZti3QcBY4w-eOocIVFRitvrGvXqMnWhcys,245
39
- mdbq-1.2.5.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
40
- mdbq-1.2.5.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
41
- mdbq-1.2.5.dist-info/RECORD,,
38
+ mdbq-1.2.6.dist-info/METADATA,sha256=_s1z5j_Q_dSi4lrw46NcpwMlgz5TkZnndOmWp4290Mk,245
39
+ mdbq-1.2.6.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
40
+ mdbq-1.2.6.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
41
+ mdbq-1.2.6.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.44.0)
2
+ Generator: setuptools (70.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5