mdbq 2.9.3__py3-none-any.whl → 2.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1218,7 +1218,7 @@ def one_file_to_mysql(file, db_name, table_name):
1218
1218
 
1219
1219
 
1220
1220
  def test():
1221
- path = r'C:\同步空间\BaiduSyncdisk\原始文件3\天猫推广报表\品销宝'
1221
+ path = r'/Users/xigua/Downloads/DMP报表'
1222
1222
 
1223
1223
  results = []
1224
1224
  for root, dirs, files in os.walk(path, topdown=False):
@@ -1226,12 +1226,16 @@ def test():
1226
1226
  if name.endswith('.csv') and 'baidu' not in name and '~' not in name:
1227
1227
  # df = pd.read_excel(os.path.join(root, name), header=0)
1228
1228
  df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
1229
+ results.append(df)
1229
1230
  # print(name)
1230
- # if len(df) == 0:
1231
- # continue
1232
- # # df.insert(loc=1, column='店铺名称', value='万里马官方旗舰店')
1233
- df['更新时间'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
1234
- df.to_csv(os.path.join(root, name), encoding='utf-8_sig', index=False, header=True)
1231
+ if len(df) == 0:
1232
+ continue
1233
+ if '达摩盘消耗占比' in df.columns.tolist():
1234
+ print(name)
1235
+ df.pop('达摩盘消耗占比')
1236
+ # df.insert(loc=1, column='店铺名称', value='万里马官方旗舰店')
1237
+ # df['更新时间'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
1238
+ df.to_csv(os.path.join(root, name), encoding='utf-8_sig', index=False, header=True)
1235
1239
  # for col in ['更新时间']:
1236
1240
  # if col not in df.columns.tolist():
1237
1241
  # print(name)
@@ -1243,12 +1247,23 @@ def test():
1243
1247
  # os.remove(os.path.join(root, name))
1244
1248
  # results.append(df)
1245
1249
  # df = pd.concat(results)
1246
- # path = '/Users/xigua/Downloads/手淘搜索_本店引流词'
1247
- # filename = 'py_xg_手淘搜索_本店引流词_万里马官方旗舰店_2024-05_合并.csv'
1250
+ # df.drop_duplicates(
1251
+ # subset=[
1252
+ # '日期',
1253
+ # '店铺名称',
1254
+ # '报表类型',
1255
+ # '消耗',
1256
+ # '展现量',
1257
+ # '点击量',
1258
+ # ], keep='last', inplace=True, ignore_index=True)
1259
+ # df.fillna(0, inplace=True)
1260
+ # for col in df.columns.tolist():
1261
+ # df[col] = df[col].apply(lambda x: 0 if str(x) == '' else x)
1262
+ # path = '/Users/xigua/Downloads'
1263
+ # filename = '品销宝_2024年_合并.csv'
1248
1264
  # df.to_csv(os.path.join(path, filename), encoding='utf-8_sig', index=False, header=True)
1249
1265
 
1250
1266
 
1251
-
1252
1267
  if __name__ == '__main__':
1253
1268
  # username = 'root'
1254
1269
  # password = ''
@@ -1257,9 +1272,9 @@ if __name__ == '__main__':
1257
1272
  #
1258
1273
  # # 上传 1 个文件到数据库
1259
1274
  # one_file_to_mysql(
1260
- # file=r'/Users/xi',
1261
- # db_name='推广数据2',
1262
- # table_name='超级直播',
1275
+ # file=r'/Users/xigua/Downloads/DMP报表_2024-10-23_2024-10-29.csv',
1276
+ # db_name='达摩盘3',
1277
+ # table_name='dmp人群报表',
1263
1278
  # )
1264
1279
 
1265
1280
  # test()
@@ -1267,10 +1282,10 @@ if __name__ == '__main__':
1267
1282
  col = 1
1268
1283
  if col:
1269
1284
  # 上传一个目录到指定数据库
1270
- db_name = '生意参谋3'
1271
- table_name = '商品排行'
1285
+ db_name = '达摩盘3'
1286
+ table_name = 'dmp人群报表'
1272
1287
  upload_dir(
1273
- path=r'C:\同步空间\BaiduSyncdisk\原始文件3\生意参谋\商品排行',
1288
+ path=r'/Users/xigua/Downloads/DMP报表',
1274
1289
  db_name=db_name,
1275
1290
  collection_name=table_name,
1276
1291
  )
@@ -6,6 +6,7 @@ from mdbq.mysql import mysql
6
6
  from mdbq.mysql import s_query
7
7
  from mdbq.aggregation import optimize_data
8
8
  from mdbq.config import myconfig
9
+ from mdbq.config import products
9
10
  import datetime
10
11
  from dateutil.relativedelta import relativedelta
11
12
  import pandas as pd
@@ -15,6 +16,7 @@ import platform
15
16
  import getpass
16
17
  import json
17
18
  import os
19
+ import time
18
20
 
19
21
  """
20
22
  程序用于下载数据库(调用 s_query.py 下载并清洗), 并对数据进行聚合清洗, 不会更新数据库信息;
@@ -855,8 +857,7 @@ class MysqlDatasQuery:
855
857
  df_crowd.sort_values('日期', ascending=True, ignore_index=True, inplace=True)
856
858
  df_crowd.drop_duplicates(subset=['人群id',], keep='last', inplace=True, ignore_index=True)
857
859
  df_crowd.pop('日期')
858
- # print(df_crowd)
859
-
860
+ df_crowd = df_crowd.astype({'人群id': 'int64'}, errors='ignore')
860
861
  projection = {}
861
862
  df_dmp = self.download.data_to_df(
862
863
  db_name='达摩盘3',
@@ -865,6 +866,7 @@ class MysqlDatasQuery:
865
866
  end_date=end_date,
866
867
  projection=projection,
867
868
  )
869
+ df_dmp = df_dmp.astype({'人群id': 'int64'}, errors='ignore')
868
870
  df_dmp.sort_values('日期', ascending=True, ignore_index=True, inplace=True)
869
871
  df_dmp.drop_duplicates(subset=['日期', '人群id', '消耗_元'], keep='last', inplace=True, ignore_index=True)
870
872
  df = pd.merge(df_dmp, df_crowd, left_on=['人群id'], right_on=['人群id'], how='left')
@@ -1959,7 +1961,7 @@ class GroupBy:
1959
1961
  )
1960
1962
  return df
1961
1963
 
1962
- @try_except
1964
+ # @try_except
1963
1965
  def performance_jd(self, jd_tg=True):
1964
1966
  jdtg, sku_sales = self.data_jdtg['京东_京准通'], self.data_jdtg['京东_sku_商品明细']
1965
1967
  jdtg = jdtg.groupby(['日期', '跟单sku_id'],
@@ -1973,6 +1975,7 @@ class GroupBy:
1973
1975
  df = df[['日期', '商品id', '货号', '成交单量', '成交金额', '成本价']]
1974
1976
  df['商品id'] = df['商品id'].astype(str)
1975
1977
  jdtg['跟单sku_id'] = jdtg['跟单sku_id'].astype(str)
1978
+ jdtg = jdtg.astype({'日期': 'datetime64[ns]'}, errors='raise')
1976
1979
  if jd_tg is True:
1977
1980
  # 完整的数据表,包含全店所有推广、销售数据
1978
1981
  df = pd.merge(df, jdtg, how='left', left_on=['日期', '商品id'], right_on=['日期', '跟单sku_id']) # df 合并推广表
@@ -2354,42 +2357,40 @@ def data_aggregation(months=1, is_juhe=True, less_dict=[]):
2354
2357
  )
2355
2358
 
2356
2359
 
2357
- def main():
2360
+ def main(days=100, months=3):
2358
2361
  # 更新日期表
2359
2362
  date_table()
2360
2363
  # 更新货品年份基准表, 属性设置 3 - 货品年份基准
2361
2364
  p = products.Products()
2362
2365
  p.to_mysql()
2363
2366
 
2367
+ # 数据聚合
2368
+ data_aggregation(
2369
+ months=months,
2370
+ is_juhe=True, # 生成聚合表
2371
+ # less_dict=['天猫_品销宝账户报表'], # 单独聚合某一个数据库
2372
+ )
2373
+ time.sleep(60)
2374
+
2364
2375
  system = platform.system() # 本机系统
2365
2376
  host_name = socket.gethostname() # 本机名
2366
2377
  conf = myconfig.main()
2367
2378
  db_list = conf[system][host_name]['mysql']['数据库集']
2368
- db_list = [item for item in db_list if item != '聚合数据']
2369
- # 清理所有非聚合数据的库
2379
+ # 清理所有库
2370
2380
  optimize_data.op_data(
2371
2381
  db_name_lists=db_list,
2372
- days=5,
2373
- is_mongo=True,
2374
- is_mysql=True,
2375
- )
2376
-
2377
- # 数据聚合
2378
- query_data.data_aggregation(months=3)
2379
- time.sleep(60)
2380
-
2381
- # 清理聚合数据, mongodb 中没有聚合数据,所以只需要清理 mysql 即可
2382
- optimize_data.op_data(
2383
- db_name_lists=['聚合数据'],
2384
- days=100,
2382
+ days=days,
2385
2383
  is_mongo=False,
2386
2384
  is_mysql=True,
2387
2385
  )
2388
2386
 
2389
2387
 
2390
2388
  if __name__ == '__main__':
2391
- data_aggregation(
2392
- months=3,
2393
- is_juhe=True, # 生成聚合表
2394
- # less_dict=['天猫_品销宝账户报表'], # 单独聚合某一个数据库
2395
- )
2389
+ main(days=100, months=3)
2390
+
2391
+ # data_aggregation(
2392
+ # months=3,
2393
+ # is_juhe=True, # 生成聚合表
2394
+ # # less_dict=['天猫_品销宝账户报表'], # 单独聚合某一个数据库
2395
+ # )
2396
+
mdbq/config/products.py CHANGED
@@ -124,25 +124,39 @@ class Products:
124
124
  ]
125
125
  self.datas += my_datas
126
126
 
127
+
127
128
  def to_mysql(self):
128
129
  self.update_my_datas()
129
130
  df = pd.DataFrame(self.datas)
130
- m = mysql.MysqlUpload(
131
+ m_engine = mysql.MysqlUpload(
131
132
  username=username,
132
133
  password=password,
133
134
  host=host,
134
135
  port=port,
135
136
  )
136
- m.df_to_mysql(
137
- df=df,
138
- db_name='属性设置3',
139
- table_name='货品年份基准',
140
- move_insert = False,
141
- df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
142
- drop_duplicates=True, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
143
- icm_update=[],
144
- service_database=service_database, # 用来追踪处理进度
145
- )
137
+ for dict_data in df.to_dict(orient='records'):
138
+ m_engine.dict_to_mysql(
139
+ db_name='属性设置3',
140
+ table_name='货品年份基准',
141
+ dict_data=dict_data,
142
+ # icm_update=['日期', '店铺名称', '宝贝id'], # 唯一组合键
143
+ unique_main_key=['商品id'],
144
+ set_type={
145
+ '商品id': 'mediumtext',
146
+ '平台': 'mediumtext',
147
+ '上市年份': 'mediumtext',
148
+ },
149
+ )
150
+ # m.df_to_mysql(
151
+ # df=df,
152
+ # db_name='属性设置3',
153
+ # table_name='货品年份基准',
154
+ # move_insert = False,
155
+ # df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
156
+ # drop_duplicates=True, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
157
+ # icm_update=[],
158
+ # service_database=service_database, # 用来追踪处理进度
159
+ # )
146
160
 
147
161
  def market_date(self, product_id: int):
148
162
  try:
mdbq/mysql/mysql.py CHANGED
@@ -132,6 +132,7 @@ class MysqlUpload:
132
132
 
133
133
  return wrapper
134
134
 
135
+ @try_except
135
136
  def dict_to_mysql(self, db_name, table_name, dict_data, icm_update=None, main_key=None, unique_main_key=None, index_length=100, set_type=None):
136
137
  """
137
138
  插入字典数据
@@ -294,10 +295,12 @@ class MysqlUpload:
294
295
  if unique_main_key:
295
296
  for col in unique_main_key:
296
297
  del dict_data[col]
297
- update_datas = ', '.join([f'{k} = VALUES({k})' for k, v in dict_data.items()])
298
+ # 涉及列名务必使用反引号
299
+ update_datas = ', '.join([f'`{k}` = VALUES(`{k}`)' for k, v in dict_data.items()])
298
300
 
299
301
  # 构建 sql
300
302
  sql = f"INSERT INTO %s (%s) VALUES (%s) ON DUPLICATE KEY UPDATE %s" % (table_name, keys_data, values_data, update_datas)
303
+ # print(sql)
301
304
  cursor.execute(sql)
302
305
  connection.commit() # 提交数据库
303
306
  connection.close()
@@ -329,6 +332,10 @@ class MysqlUpload:
329
332
  count_int, count_float = count_decimal_places(v) # 判断小数,返回小数位数
330
333
  if result1: # 京东sku/spu商品信息
331
334
  __res_dict.update({k: 'mediumtext'})
335
+ elif k == '日期':
336
+ __res_dict.update({k: 'DATE'})
337
+ elif k == '更新时间':
338
+ __res_dict.update({k: 'TIMESTAMP'})
332
339
  elif str(v) == '':
333
340
  __res_dict.update({k: 'mediumtext'})
334
341
  elif result2: # 小数
@@ -418,6 +425,11 @@ class MysqlUpload:
418
425
  for service_name, database in service_database.items():
419
426
  # 2. 列数据类型转换,将 df 数据类型转换为 mysql 的数据类型
420
427
  dtypes, cl, db_n, tb_n = self.convert_dtypes(df=df, db_name=db_name, table_name=table_name, path=json_path, service_name=service_name)
428
+ for dy in dtypes.keys():
429
+ if '日期' == dy:
430
+ dtypes.update({'日期': 'DATE'})
431
+ if '更新时间' == dy:
432
+ dtypes.update({'更新时间': 'TIMESTAMP'})
421
433
 
422
434
  # 有特殊字符不需转义
423
435
  sql = "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s;"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 2.9.3
3
+ Version: 2.9.4
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,11 +1,11 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
2
  mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
- mdbq/aggregation/aggregation.py,sha256=IVh9SFO1yp12qDBuEOWTi9SAytYktKBrsPJNPuDetSM,73254
4
+ mdbq/aggregation/aggregation.py,sha256=IJS5ILEmYlrepj2oX6TDuMjab5rYEOpQuYyTpgfRbR0,73747
5
5
  mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
6
6
  mdbq/aggregation/mysql_types.py,sha256=YTGyrF9vcRgfkQbpT-e-JdJ7c7VF1dDHgyx9YZRES8w,10934
7
7
  mdbq/aggregation/optimize_data.py,sha256=79uwiM2WqNNFxGpE2wKz742PRq-ZGgFjdOV0vgptHdY,3513
8
- mdbq/aggregation/query_data.py,sha256=zut8WyyAKTULfGWMltyQYqsVsIaBDUU8E3w2_UL4hbA,103248
8
+ mdbq/aggregation/query_data.py,sha256=w_p013oMdF6YovQwP6RY6wiPTKuuTfSn53Wo1RC_CD0,103372
9
9
  mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
10
10
  mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
11
11
  mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
@@ -18,7 +18,7 @@ mdbq/company/home_sh.py,sha256=42CZ2tZIXHLl2mOl2gk2fZnjH2IHh1VJ1s3qHABjonY,18021
18
18
  mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
19
19
  mdbq/config/get_myconf.py,sha256=cmNvsyoNa0RbZ9FOTjSd3jyyGwkxjUo0phvdHbGlrms,6010
20
20
  mdbq/config/myconfig.py,sha256=EGymTlAimtHIDJ9egCtOehBEPOj6rea504kvsEZu64o,854
21
- mdbq/config/products.py,sha256=Sj4FSb2dZcMKp6ox-FJdIR87QLgMN_TJ7Z6KAWMTWyw,6214
21
+ mdbq/config/products.py,sha256=sC4ctAiHR7ydkEXuIlvwvTPDLJXwengkG0hFWSQRFz0,6808
22
22
  mdbq/config/set_support.py,sha256=xkZCX6y9Bq1ppBpJAofld4B2YtchA7fl0eT3dx3CrSI,777
23
23
  mdbq/config/update_conf.py,sha256=taL3ZqKgiVWwUrDFuaYhim9a72Hm4BHRhhDscJTziR8,4535
24
24
  mdbq/dataframe/__init__.py,sha256=2HtCN8AdRj53teXDqzysC1h8aPL-mMFy561ESmhehGQ,22
@@ -28,7 +28,7 @@ mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
28
28
  mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
29
29
  mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
30
30
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
31
- mdbq/mysql/mysql.py,sha256=tKkgjbOvy5uIn7Z-ws_biS-04-UHnr5rKqNvtWr_Yss,62024
31
+ mdbq/mysql/mysql.py,sha256=5l5wFS6AFjXuZtb54JSnoqh_DlB3-tQd9LNac0CSYjQ,62538
32
32
  mdbq/mysql/recheck_mysql.py,sha256=jHQSlQy0PlQ_EYICQv_2nairUX3t6OIwPtSELKIpjkY,8702
33
33
  mdbq/mysql/s_query.py,sha256=bgNNIqYLDCHjD5KTFcm6x4u74selpAGs5ouJYuqX86k,8447
34
34
  mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
@@ -45,7 +45,7 @@ mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
45
45
  mdbq/req_post/req_tb.py,sha256=PexWSCPJNM6Tv0ol4lAWIhlOwsAr_frnjtcdSHCFiek,36179
46
46
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
47
47
  mdbq/spider/aikucun.py,sha256=jHrdGWBJQaSywx7V-U4YuM6vWkwC5SR5tTOOdB3YU_c,17306
48
- mdbq-2.9.3.dist-info/METADATA,sha256=fL1JR-lJNlMr2cIzQIEO460TetP9yzBerJPCJYnBRQ8,243
49
- mdbq-2.9.3.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
50
- mdbq-2.9.3.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
51
- mdbq-2.9.3.dist-info/RECORD,,
48
+ mdbq-2.9.4.dist-info/METADATA,sha256=1xFHayCMA4H3FNPMIEdut-xWBAWiwooZZrD6p7blLQU,243
49
+ mdbq-2.9.4.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
50
+ mdbq-2.9.4.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
51
+ mdbq-2.9.4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (70.1.0)
2
+ Generator: bdist_wheel (0.44.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5