mdbq 2.5.7__tar.gz → 2.5.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {mdbq-2.5.7 → mdbq-2.5.9}/PKG-INFO +2 -2
  2. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/aggregation/optimize_data.py +41 -40
  3. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/clean/clean_upload.py +263 -47
  4. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/dataframe/converter.py +3 -3
  5. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/mysql/mysql.py +7 -5
  6. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/spider/aikucun.py +64 -7
  7. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq.egg-info/PKG-INFO +2 -2
  8. {mdbq-2.5.7 → mdbq-2.5.9}/setup.py +2 -2
  9. {mdbq-2.5.7 → mdbq-2.5.9}/README.txt +0 -0
  10. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/__init__.py +0 -0
  11. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/__version__.py +0 -0
  12. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/aggregation/__init__.py +0 -0
  13. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/aggregation/aggregation.py +0 -0
  14. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/aggregation/df_types.py +0 -0
  15. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/aggregation/mysql_types.py +0 -0
  16. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/aggregation/query_data.py +0 -0
  17. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/bdup/__init__.py +0 -0
  18. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/bdup/bdup.py +0 -0
  19. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/clean/__init__.py +0 -0
  20. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/clean/data_clean.py +0 -0
  21. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/company/__init__.py +0 -0
  22. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/company/copysh.py +0 -0
  23. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/company/home_sh.py +0 -0
  24. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/config/__init__.py +0 -0
  25. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/config/get_myconf.py +0 -0
  26. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/config/products.py +0 -0
  27. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/config/set_support.py +0 -0
  28. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/config/update_conf.py +0 -0
  29. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/dataframe/__init__.py +0 -0
  30. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/log/__init__.py +0 -0
  31. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/log/mylogger.py +0 -0
  32. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/mongo/__init__.py +0 -0
  33. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/mongo/mongo.py +0 -0
  34. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/mysql/__init__.py +0 -0
  35. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/mysql/s_query.py +0 -0
  36. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/mysql/year_month_day.py +0 -0
  37. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/other/__init__.py +0 -0
  38. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/other/porxy.py +0 -0
  39. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/other/pov_city.py +0 -0
  40. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/other/sku_picture.py +0 -0
  41. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/other/ua_sj.py +0 -0
  42. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/pbix/__init__.py +0 -0
  43. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/pbix/pbix_refresh.py +0 -0
  44. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/pbix/refresh_all.py +0 -0
  45. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/pbix/refresh_all_old.py +0 -0
  46. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/req_post/__init__.py +0 -0
  47. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/req_post/req_tb.py +0 -0
  48. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq/spider/__init__.py +0 -0
  49. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq.egg-info/SOURCES.txt +0 -0
  50. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq.egg-info/dependency_links.txt +0 -0
  51. {mdbq-2.5.7 → mdbq-2.5.9}/mdbq.egg-info/top_level.txt +0 -0
  52. {mdbq-2.5.7 → mdbq-2.5.9}/setup.cfg +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 2.5.7
4
- Home-page: https://pypi.org/project/mdbq
3
+ Version: 2.5.9
4
+ Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
7
7
  License: MIT
@@ -57,8 +57,8 @@ def restart_mongodb():
57
57
  subprocess.call(command, shell=True)
58
58
 
59
59
 
60
- def op_data(db_name_lists, service_databases=None, days: int = 63, is_mongo=True, is_mysql=True):
61
- """ service_databases 这个参数暂时没有用 """
60
+ def op_data(db_name_lists, service_databases=[{'home_lx': 'mysql', 'home_lx': 'mongodb'}], days: int = 63, is_mongo=True, is_mysql=True):
61
+ """ """
62
62
  # for service_database in service_databases:
63
63
  # for service_name, database in service_database.items():
64
64
  # username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
@@ -68,47 +68,48 @@ def op_data(db_name_lists, service_databases=None, days: int = 63, is_mongo=True
68
68
  # ]
69
69
  # s.days = days
70
70
  # s.optimize_list()
71
+ for service_database in service_databases:
72
+ for service_name, database in service_database.items():
73
+ if socket.gethostname() == 'xigua_lx' or socket.gethostname() == 'xigua1' or socket.gethostname() == 'Mac2.local':
74
+ # mongodb
75
+ if is_mongo and database == 'mongodb':
76
+ username, password, host, port = get_myconf.select_config_values(
77
+ target_service=service_name,
78
+ database=database,
79
+ )
80
+ m = mongo.OptimizeDatas(username=username, password=password, host=host, port=port)
81
+ m.db_name_lists = db_name_lists
82
+ m.days = days
83
+ m.optimize_list()
84
+ if m.client:
85
+ m.client.close()
86
+ print(f'已关闭 mongodb 连接')
71
87
 
72
- if socket.gethostname() == 'xigua_lx' or socket.gethostname() == 'xigua1' or socket.gethostname() == 'Mac2.local':
73
- # mongodb
74
- if is_mongo:
75
- username, password, host, port = get_myconf.select_config_values(
76
- target_service='home_lx',
77
- database='mongodb',
78
- )
79
- m = mongo.OptimizeDatas(username=username, password=password, host=host, port=port)
80
- m.db_name_lists = db_name_lists
81
- m.days = days
82
- m.optimize_list()
83
- if m.client:
84
- m.client.close()
85
- print(f'已关闭 mongodb 连接')
88
+ if socket.gethostname() == 'xigua_lx':
89
+ restart_mongodb() # mongodb 太占内存了, 重启服务, 释放内存
86
90
 
87
- if socket.gethostname() == 'xigua_lx':
88
- restart_mongodb() # mongodb 太占内存了, 重启服务, 释放内存
91
+ # Mysql
92
+ if is_mysql and database == 'mysql':
93
+ username, password, host, port = get_myconf.select_config_values(
94
+ target_service=service_name,
95
+ database=database,
96
+ )
97
+ s = mysql.OptimizeDatas(username=username, password=password, host=host, port=port)
98
+ s.db_name_lists = db_name_lists
99
+ s.days = days
100
+ s.optimize_list()
89
101
 
90
- # Mysql
91
- if is_mysql:
92
- username, password, host, port = get_myconf.select_config_values(
93
- target_service='home_lx',
94
- database='mysql',
95
- )
96
- s = mysql.OptimizeDatas(username=username, password=password, host=host, port=port)
97
- s.db_name_lists = db_name_lists
98
- s.days = days
99
- s.optimize_list()
100
-
101
- elif socket.gethostname() == 'company':
102
- # Mysql
103
- if is_mysql:
104
- username, password, host, port = get_myconf.select_config_values(
105
- target_service='company',
106
- database='mysql',
107
- )
108
- s = mysql.OptimizeDatas(username=username, password=password, host=host, port=port)
109
- s.db_name_lists = db_name_lists
110
- s.days = days
111
- s.optimize_list()
102
+ elif socket.gethostname() == 'company':
103
+ # Mysql
104
+ if is_mysql and database == 'mysql':
105
+ username, password, host, port = get_myconf.select_config_values(
106
+ target_service=service_name,
107
+ database=database,
108
+ )
109
+ s = mysql.OptimizeDatas(username=username, password=password, host=host, port=port)
110
+ s.db_name_lists = db_name_lists
111
+ s.days = days
112
+ s.optimize_list()
112
113
 
113
114
 
114
115
  if __name__ == '__main__':
@@ -83,6 +83,141 @@ class DataClean:
83
83
  os.makedirs(_save_paths, exist_ok=True)
84
84
  _df.to_csv(os.path.join(_save_paths, filenames), encoding=encoding, index=False, header=True)
85
85
 
86
+ def sycm_tm(self, path=None, is_except=[]):
87
+ """ 天猫 生意参谋数据 """
88
+ if not path:
89
+ path = self.path
90
+ report_names = [
91
+ {
92
+ '文件简称': '商品排行', # 文件名中包含的字符
93
+ '数据库名': '天猫_生意参谋',
94
+ '集合名称': '商品排行',
95
+ },
96
+ {
97
+ '文件简称': '店铺来源_来源构成_万里马官方旗舰店', # 文件名中包含的字符
98
+ '数据库名': '天猫_生意参谋',
99
+ '集合名称': '店铺流量来源构成',
100
+ },
101
+ ]
102
+ for root, dirs, files in os.walk(path, topdown=False):
103
+ for name in files:
104
+ if '~$' in name or '.DS' in name or '.localized' in name or '.jpg' in name or '.png' in name:
105
+ continue
106
+ if 'py_xg' in name:
107
+ continue
108
+ is_continue = False
109
+ if is_except:
110
+ for item in is_except:
111
+ if item in os.path.join(root, name):
112
+ # print(name)
113
+ is_continue = True
114
+ break
115
+ if is_continue: # 需要排除不做处理的文件或文件夹
116
+ continue
117
+
118
+ # 这里排除掉非目标报表
119
+ is_continue = False
120
+ db_name = None # 初始化参数
121
+ collection_name = None
122
+ for item in report_names:
123
+ if item['文件简称'] in name:
124
+ db_name = item['数据库名']
125
+ collection_name = item['集合名称']
126
+ is_continue = True
127
+ if not is_continue:
128
+ continue
129
+ if name.endswith('.xls') and '商品排行_万里马官方旗舰店' in name:
130
+ df = pd.read_excel(os.path.join(root, name), header=4)
131
+ if len(df) == 0:
132
+ print(f'{name} 报表数据为空')
133
+ continue
134
+ df.replace(to_replace=['-'], value=0, regex=False, inplace=True)
135
+ df.replace(to_replace=[','], value='', regex=True, inplace=True)
136
+ df.rename(columns={'统计日期': '日期', '商品ID': '商品id'}, inplace=True)
137
+ shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)', name)[0]
138
+ df.insert(loc=1, column='店铺名称', value=shop_name)
139
+ new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
140
+ self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
141
+ os.remove(os.path.join(root, name))
142
+ elif name.endswith('.csv') and '_来源构成_万里马官方旗舰店' in name:
143
+ df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
144
+ new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
145
+ self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
146
+ os.remove(os.path.join(root, name))
147
+
148
+ # 将数据传入 self.datas 等待更新进数据库
149
+ if not db_name or not collection_name:
150
+ # print(f'db_name/collection_name 不能为空')
151
+ continue
152
+ self.datas.append(
153
+ {
154
+ '数据库名': db_name,
155
+ '集合名称': collection_name,
156
+ '数据主体': df,
157
+ '文件名': name,
158
+ }
159
+ )
160
+
161
+ def dmp_tm(self, path=None, is_except=[]):
162
+ """ 天猫 达摩盘 """
163
+ if not path:
164
+ path = self.path
165
+ report_names = [
166
+ {
167
+ '文件简称': '我的人群属性', # 文件名中包含的字符
168
+ '数据库名': '达摩盘3',
169
+ '集合名称': '我的人群属性',
170
+ },
171
+ ]
172
+ for root, dirs, files in os.walk(path, topdown=False):
173
+ for name in files:
174
+ if '~$' in name or '.DS' in name or '.localized' in name or '.jpg' in name or '.png' in name:
175
+ continue
176
+ if 'py_xg' in name:
177
+ continue
178
+ is_continue = False
179
+ if is_except:
180
+ for item in is_except:
181
+ if item in os.path.join(root, name):
182
+ # print(name)
183
+ is_continue = True
184
+ break
185
+ if is_continue: # 需要排除不做处理的文件或文件夹
186
+ continue
187
+
188
+ # 这里排除掉非目标报表
189
+ is_continue = False
190
+ db_name = None # 初始化参数
191
+ collection_name = None
192
+ for item in report_names:
193
+ if item['文件简称'] in name:
194
+ db_name = item['数据库名']
195
+ collection_name = item['集合名称']
196
+ is_continue = True
197
+ if not is_continue:
198
+ continue
199
+ if name.endswith('.csv') and '人群属性_万里马官方旗舰店' in name: # 推广类报表
200
+ df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
201
+ if len(df) == 0:
202
+ print(f'{name} 报表数据为空')
203
+ continue
204
+ new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
205
+ self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
206
+ os.remove(os.path.join(root, name))
207
+
208
+ # 将数据传入 self.datas 等待更新进数据库
209
+ if not db_name or not collection_name:
210
+ # print(f'db_name/collection_name 不能为空')
211
+ continue
212
+ self.datas.append(
213
+ {
214
+ '数据库名': db_name,
215
+ '集合名称': collection_name,
216
+ '数据主体': df,
217
+ '文件名': name,
218
+ }
219
+ )
220
+
86
221
  def tg_reports(self, path=None, is_except=[]):
87
222
  """ 处理天猫淘宝推广类报表 """
88
223
  if not path:
@@ -172,7 +307,7 @@ class DataClean:
172
307
 
173
308
  # 这里排除掉非推广类报表
174
309
  is_continue = False
175
- db_name = None
310
+ db_name = None # 初始化参数
176
311
  collection_name = None
177
312
  for item in report_names:
178
313
  if item['文件简称'] in name:
@@ -234,7 +369,7 @@ class DataClean:
234
369
 
235
370
  # 将数据传入 self.datas 等待更新进数据库
236
371
  if not db_name or not collection_name:
237
- print(f'db_name/collection_name 不能为空')
372
+ # print(f'db_name/collection_name 不能为空')
238
373
  continue
239
374
  self.datas.append(
240
375
  {
@@ -290,7 +425,7 @@ class DataClean:
290
425
 
291
426
  # 这里排除掉非目标报表
292
427
  is_continue = False
293
- db_name = None
428
+ db_name = None # 初始化参数
294
429
  collection_name = None
295
430
  for item in report_names:
296
431
  if item['文件简称'] in name:
@@ -303,7 +438,7 @@ class DataClean:
303
438
  if name.endswith('.csv') and 'baobei' in name:
304
439
  encoding = self.get_encoding(file_path=os.path.join(root, name))
305
440
  df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
306
- pattern = re.findall(r'-(\d{4})(\d{2})(\d{2}).csv', name)[0]
441
+ pattern = re.findall(r'-(\d{4})(\d{2})(\d{2})\W', name)[0]
307
442
  df['日期'] = '-'.join(pattern)
308
443
  df.replace(to_replace=['--'], value='', regex=False, inplace=True)
309
444
  new_name = f'py_xg_天猫_baobeitrains_{'-'.join(pattern)}.csv'
@@ -327,7 +462,7 @@ class DataClean:
327
462
  elif name.endswith('.csv') and '省份城市分析' in name:
328
463
  encoding = self.get_encoding(file_path=os.path.join(root, name))
329
464
  df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
330
- pattern = re.findall(r'(.*[\u4e00-\u9fa5])(\d{4})(\d{2})(\d{2})\.', name)[0]
465
+ pattern = re.findall(r'(.*[\u4e00-\u9fa5])(\d{4})(\d{2})(\d{2})\W', name)[0] # 注意后面可能有小括号 ...27 (2).csv
331
466
  date = '-'.join(pattern[1:])
332
467
  new_name = f'py_xg_天猫_{pattern[0]}-{date}.csv'
333
468
  df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
@@ -350,9 +485,9 @@ class DataClean:
350
485
  os.remove(os.path.join(root, name))
351
486
  elif name.endswith('.csv') and '店铺销售指标' in name:
352
487
  # 生意经, 店铺指标,仅限月数据,实际日指标也可以
353
- name_st = re.findall(r'(.*)\(分日', name)
488
+ name_st = re.findall(r'([\u4e00-\u9fa5]+)\(分日', name)
354
489
  if not name_st:
355
- print(f'{name} 已转换的表格')
490
+ print(f'{name} 正则提取文件名失败')
356
491
  continue
357
492
  encoding = self.get_encoding(file_path=os.path.join(root, name))
358
493
  df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
@@ -375,7 +510,7 @@ class DataClean:
375
510
 
376
511
  # 将数据传入 self.datas 等待更新进数据库
377
512
  if not db_name or not collection_name:
378
- print(f'db_name/collection_name 不能为空')
513
+ # print(f'db_name/collection_name 不能为空')
379
514
  continue
380
515
  self.datas.append(
381
516
  {
@@ -431,7 +566,7 @@ class DataClean:
431
566
 
432
567
  # 这里排除掉非目标报表
433
568
  is_continue = False
434
- db_name = None
569
+ db_name = None # 初始化参数
435
570
  collection_name = None
436
571
  for item in report_names:
437
572
  if item['文件简称'] in name:
@@ -444,7 +579,7 @@ class DataClean:
444
579
  if name.endswith('.csv') and 'baobei' in name:
445
580
  encoding = self.get_encoding(file_path=os.path.join(root, name))
446
581
  df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
447
- pattern = re.findall(r'-(\d{4})(\d{2})(\d{2}).csv', name)[0]
582
+ pattern = re.findall(r'-(\d{4})(\d{2})(\d{2})\W', name)[0]
448
583
  df['日期'] = '-'.join(pattern)
449
584
  df.replace(to_replace=['--'], value='', regex=False, inplace=True)
450
585
  new_name = f'py_xg_淘宝_baobeitrains_{'-'.join(pattern)}.csv'
@@ -516,7 +651,7 @@ class DataClean:
516
651
 
517
652
  # 将数据传入 self.datas 等待更新进数据库
518
653
  if not db_name or not collection_name:
519
- print(f'db_name/collection_name 不能为空')
654
+ # print(f'db_name/collection_name 不能为空')
520
655
  continue
521
656
  self.datas.append(
522
657
  {
@@ -582,7 +717,7 @@ class DataClean:
582
717
 
583
718
  # 这里排除掉非目标报表
584
719
  is_continue = False
585
- db_name = None
720
+ db_name = None # 初始化参数
586
721
  collection_name = None
587
722
  for item in report_names:
588
723
  if item['文件简称'] in name:
@@ -633,7 +768,7 @@ class DataClean:
633
768
 
634
769
  # 将数据传入 self.datas 等待更新进数据库
635
770
  if not db_name or not collection_name:
636
- print(f'db_name/collection_name 不能为空')
771
+ # print(f'db_name/collection_name 不能为空')
637
772
  continue
638
773
  # print(name)
639
774
  self.datas.append(
@@ -663,6 +798,8 @@ class DataClean:
663
798
  break
664
799
  if is_continue: # 需要排除不做处理的文件或文件夹
665
800
  continue
801
+ db_name = None # 初始化参数
802
+ collection_name = None
666
803
 
667
804
  if name.endswith('.xlsx') and '商品素材_' in name:
668
805
  shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)_', name)[0]
@@ -681,7 +818,7 @@ class DataClean:
681
818
 
682
819
  # 将数据传入 self.datas 等待更新进数据库
683
820
  if not db_name or not collection_name:
684
- print(f'db_name/collection_name 不能为空')
821
+ # print(f'db_name/collection_name 不能为空')
685
822
  continue
686
823
  self.datas.append(
687
824
  {
@@ -718,6 +855,70 @@ class DataClean:
718
855
  os.remove(old_file) # 如果存在则移除
719
856
  shutil.move(os.path.join(path, _name), t2) # 将文件从下载文件夹移到目标位置
720
857
 
858
+ def move_sycm(self, path=None, is_except=[]):
859
+ """ 生意参谋 """
860
+ if not path:
861
+ path = self.path
862
+ for root, dirs, files in os.walk(path, topdown=False):
863
+ for name in files:
864
+ # print(name)
865
+ is_continue = False
866
+ if is_except:
867
+ for item in is_except:
868
+ # print(item, f'-----', os.path.join(root, name))
869
+ if item in os.path.join(root, name):
870
+ # print(name)
871
+ is_continue = True
872
+ break
873
+ if is_continue: # 需要排除不做处理的文件或文件夹
874
+ continue
875
+
876
+ # print(is_except, is_continue)
877
+ def bib(paths, _as_month=None):
878
+ """闭包函数"""
879
+ self.move_files(path=path, _name=name, target_path=paths, _as_month=_as_month)
880
+
881
+ if 'py_xg' not in name: # 排除非目标文件
882
+ continue
883
+
884
+ if name.endswith('.csv') and '商品排行_万里马官方旗舰店' in name:
885
+ t_path = os.path.join(self.source_path, '天猫_生意参谋', '商品排行')
886
+ bib(t_path, _as_month=True)
887
+ elif name.endswith('.csv') and '店铺来源_来源构成_万里马官方旗舰店' in name:
888
+ t_path = os.path.join(self.source_path, '天猫_生意参谋', '店铺流量来源')
889
+ bib(t_path, _as_month=True)
890
+
891
+ def move_dmp(self, path=None, is_except=[]):
892
+ """ 达摩盘 """
893
+ if not path:
894
+ path = self.path
895
+ for root, dirs, files in os.walk(path, topdown=False):
896
+ for name in files:
897
+ # print(name)
898
+ is_continue = False
899
+ if is_except:
900
+ for item in is_except:
901
+ # print(item, f'-----', os.path.join(root, name))
902
+ if item in os.path.join(root, name):
903
+ # print(name)
904
+ is_continue = True
905
+ break
906
+ if is_continue: # 需要排除不做处理的文件或文件夹
907
+ continue
908
+
909
+ # print(is_except, is_continue)
910
+ def bib(paths, _as_month=None):
911
+ """闭包函数"""
912
+ self.move_files(path=path, _name=name, target_path=paths, _as_month=_as_month)
913
+
914
+ if 'py_xg' not in name: # 排除非目标文件
915
+ continue
916
+
917
+ if name.endswith('.csv') and '人群属性_万里马官方旗舰店' in name:
918
+ t_path = os.path.join(self.source_path, '天猫_达摩盘', '我的人群属性')
919
+ bib(t_path, _as_month=True)
920
+
921
+
721
922
  # @try_except
722
923
  def move_sjy(self, path=None, is_except=[]):
723
924
  if not path:
@@ -1142,8 +1343,10 @@ class DataClean:
1142
1343
  df_to_json.as_json_file() # 写入 json 文件, 包含数据的 dtypes 信息
1143
1344
 
1144
1345
 
1145
- def main(service_databases=None):
1146
- # 数据分类
1346
+ def main(service_databases=None, is_mysql=False):
1347
+ """
1348
+ is_mysql: 调试时加,False: 是否后续的聚合数据
1349
+ """
1147
1350
 
1148
1351
  if not service_databases:
1149
1352
  service_databases = [
@@ -1153,23 +1356,30 @@ def main(service_databases=None):
1153
1356
  # {'nas': 'mysql'},
1154
1357
  ]
1155
1358
 
1156
- c = DataClean(
1359
+ cn = DataClean(
1157
1360
  path=upload_path, # 源文件目录,下载文件夹
1158
1361
  source_path=source_path3, # 原始文件保存目录
1159
1362
  service_databases=service_databases
1160
1363
  )
1161
- c.new_unzip(is_move=True) # 解压文件, is_move 解压后是否删除原 zip 压缩文件
1162
- c.tg_reports(is_except=['except']) # 推广报表,天猫淘宝共同清洗
1163
- c.syj_reports_tm(is_except=['except']) # 天猫生意经
1164
- # c.syj_reports_tb(is_except=['except']) # 淘宝生意经,不可以和天猫同时运行
1165
- c.jd_reports(is_except=['except']) # 清洗京东报表
1166
- c.sp_scene_clean(is_except=['except']) # 商品素材
1167
- c.upload_df(service_databases=service_databases) # 上传数据库
1168
-
1169
- c.move_sjy(is_except=['临时文件',]) # 生意经,移到文件到原始文件夹
1170
- c.move_jd(is_except=['临时文件', ]) # 京东,移到文件到原始文件夹
1171
- c.move_tg_tm(is_except=['临时文件', ]) # 天猫,移到文件到原始文件夹
1172
- c.move_tg_tb(is_except=['临时文件', ]) # 淘宝店,移到文件到原始文件夹
1364
+ cn.new_unzip(is_move=True) # 解压文件, is_move 解压后是否删除原 zip 压缩文件
1365
+ cn.sycm_tm(is_except=['except']) # 天猫生意参谋
1366
+ cn.dmp_tm(is_except=['except']) # 达摩盘
1367
+ cn.tg_reports(is_except=['except']) # 推广报表,天猫淘宝共同清洗
1368
+ cn.syj_reports_tm(is_except=['except']) # 天猫生意经
1369
+ # cn.syj_reports_tb(is_except=['except']) # 淘宝生意经,不可以和天猫同时运行
1370
+ cn.jd_reports(is_except=['except']) # 清洗京东报表
1371
+ cn.sp_scene_clean(is_except=['except']) # 商品素材
1372
+ cn.upload_df(service_databases=service_databases) # 上传数据库
1373
+
1374
+ cn.move_sycm(is_except=['临时文件', ]) # 生意参谋,移到文件到原始文件夹
1375
+ cn.move_dmp(is_except=['临时文件', ]) # 达摩盘
1376
+ cn.move_sjy(is_except=['临时文件',]) # 生意经,移到文件到原始文件夹
1377
+ cn.move_jd(is_except=['临时文件', ]) # 京东,移到文件到原始文件夹
1378
+ cn.move_tg_tm(is_except=['临时文件', ]) # 天猫,移到文件到原始文件夹
1379
+ cn.move_tg_tb(is_except=['临时文件', ]) # 淘宝店,移到文件到原始文件夹
1380
+
1381
+ if not is_mysql:
1382
+ return
1173
1383
 
1174
1384
  # 更新货品年份基准表, 属性设置 2 - 货品年份基准
1175
1385
  p = products.Products()
@@ -1196,6 +1406,7 @@ def main(service_databases=None):
1196
1406
  days=100,
1197
1407
  is_mongo=True,
1198
1408
  is_mysql=True,
1409
+ service_databases=service_databases
1199
1410
  )
1200
1411
 
1201
1412
  # 数据聚合
@@ -1214,7 +1425,7 @@ def main(service_databases=None):
1214
1425
 
1215
1426
  def test():
1216
1427
  # main_key = '单元报表'
1217
- path = f'/Users/xigua/数据中心/原始文件2/京东报表/JD流量来源13414124124'
1428
+ path = f'/Users/xigua/数据中心/原始文件2/生意参谋/商品排行qweqeqwe'
1218
1429
  for root, dirs, files in os.walk(path, topdown=False):
1219
1430
  for name in files:
1220
1431
  if '~$' in name or '.DS' in name or '.localized' in name or '.jpg' in name or '.png' in name:
@@ -1227,30 +1438,34 @@ def test():
1227
1438
  if name.endswith('.csv'):
1228
1439
  print(name)
1229
1440
  df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
1230
- # if '店铺名称' not in df.columns.tolist():
1231
- # df.insert(loc=1, column='店铺名称', value='京东箱包旗舰店')
1232
- for item in df.columns.tolist():
1233
- if '同比' in item or '环比' in item or '时间' in item:
1234
- df.pop(item)
1235
- date_all = re.findall(r'_(\d{4}-\d{2}-\d{2})_\d{4}-\d{2}-\d{2}', name)[0]
1441
+ if '店铺名称' not in df.columns.tolist():
1442
+ df.insert(loc=1, column='店铺名称', value='万里马官方旗舰店')
1443
+ df.replace(to_replace=['-'], value=0, regex=False, inplace=True)
1444
+ df.replace(to_replace=[','], value='', regex=True, inplace=True)
1445
+ if '统计日期' in df.columns.tolist() and '日期' not in df.columns.tolist():
1446
+ df.rename(columns={'统计日期': '日期', '商品ID': '商品id'}, inplace=True)
1447
+ # shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)', name)[0]
1448
+ # df.insert(loc=1, column='店铺名称', value=shop_name)
1449
+
1450
+ date_all = re.findall(r'_(\d{4}-\d{2}-\d{2})_', name)[0]
1236
1451
 
1237
1452
  date = re.findall(r'_(\d{4}-\d{2})-\d{2}', name)[0]
1238
1453
 
1239
- new_path = f'/Users/xigua/数据中心/原始文件3/京东报表/店铺来源_三级来源/{date}'
1454
+ new_path = f'/Users/xigua/数据中心/原始文件3/天猫_生意参谋/商品排行/{date}'
1240
1455
  # new_path = os.path.join(new_path, date) # 添加 年月分类
1241
1456
  if not os.path.exists(new_path):
1242
1457
  os.makedirs(new_path, exist_ok=True)
1243
1458
  # print(date_all)
1244
1459
 
1245
- new_name = f'py_xg_京东商智_店铺来源_三级来源_{date_all}.xlsx'
1460
+ new_name = f'py_xg_商品排行_万里马官方旗舰店_{date_all}.csv'
1246
1461
  # print(os.path.join(new_path, new_name))
1247
1462
  # breakpoint()
1248
- # df.to_csv(os.path.join(new_path, new_name), encoding='utf-8_sig', index=False, header=True)
1249
- try:
1250
- df.to_excel(os.path.join(new_path, new_name),
1251
- index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
1252
- except Exception as e:
1253
- print(e)
1463
+ df.to_csv(os.path.join(new_path, new_name), encoding='utf-8_sig', index=False, header=True)
1464
+ # try:
1465
+ # df.to_excel(os.path.join(new_path, new_name),
1466
+ # index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
1467
+ # except Exception as e:
1468
+ # print(e)
1254
1469
 
1255
1470
 
1256
1471
 
@@ -1258,11 +1473,12 @@ def test():
1258
1473
  if __name__ == '__main__':
1259
1474
  main(
1260
1475
  service_databases = [
1261
- # {'company': 'mysql'},
1262
- {'home_lx': 'mysql'},
1476
+ {'company': 'mysql'},
1477
+ # {'home_lx': 'mysql'},
1263
1478
  # {'home_lx': 'mongodb'},
1264
1479
  # {'nas': 'mysql'},
1265
- ]
1480
+ ],
1481
+ is_mysql = False,
1266
1482
  )
1267
1483
 
1268
1484
  # c = DataClean(
@@ -47,7 +47,7 @@ class DataFrameConverter(object):
47
47
  try:
48
48
  # 百分比在某些数据库中不兼容, 转换百分比为小数, # 转百分比的列不能含有中文或特殊字符
49
49
  df[col] = df[col].apply(
50
- lambda x: float(float((str(x).rstrip("%"))) / 100) if re.findall(r'^\d+%', str(x)) else x)
50
+ lambda x: float(float((str(x).rstrip("%"))) / 100) if re.findall(r'^\d+\.?\d*%', str(x)) else x)
51
51
  except Exception as e:
52
52
  print(f'留意错误信息: 位于列 -> {col} -> {e}')
53
53
 
@@ -93,6 +93,6 @@ if __name__ == '__main__':
93
93
  # df = converter.convert_df_cols(df)
94
94
  # print(df['a'].dtype)
95
95
  # print(df)
96
- pattern = 'dfa_dfawr__'
97
- pattern = re.sub(r'_+$', '', pattern)
96
+ pattern = '1540%'
97
+ pattern = re.findall(r'^\d+\.?\d*%', pattern)
98
98
  print(pattern)
@@ -68,7 +68,7 @@ class MysqlUpload:
68
68
 
69
69
  return wrapper
70
70
 
71
- @try_except
71
+ # @try_except
72
72
  def df_to_mysql(self, df, table_name, db_name='远程数据源', icm_update=[], service_database={'home_lx': 'mysql'}, move_insert=False, df_sql=False, drop_duplicates=False, filename=None, count=None, json_path=None, reset_id=False):
73
73
  """
74
74
  将 df 写入数据库
@@ -403,6 +403,8 @@ class MysqlUpload:
403
403
  return 'INT'
404
404
  elif dtype == 'float64':
405
405
  res = find_longest_decimal_value(df[col].tolist()) # 取小数位数最长的值
406
+ if 'e' in str(res):
407
+ res = round(float(res), 4)
406
408
  int_step = len(str(res).split('.')[0]) # 整数位数长度
407
409
  f_step = len(str(res).split('.')[1]) # 小数位数长度
408
410
 
@@ -415,17 +417,17 @@ class MysqlUpload:
415
417
  elif int_step >= 4 and f_step >= 0:
416
418
  return 'decimal(10, 2)'
417
419
  elif int_step >= 2 and f_step >= 6:
418
- return 'decimal(12, 6)'
420
+ return 'decimal(12, 4)'
419
421
  elif int_step >= 2 and f_step > 4:
420
- return 'decimal(10, 6)'
422
+ return 'decimal(12, 4)'
421
423
  elif int_step >= 2 and f_step > 2:
422
424
  return 'decimal(10, 4)'
423
425
  elif int_step >= 2 and f_step >= 0:
424
426
  return 'decimal(10, 2)'
425
427
  elif int_step >= 1 and f_step >= 6:
426
- return 'decimal(10, 6)'
428
+ return 'decimal(12, 4)'
427
429
  elif int_step >= 1 and f_step > 4:
428
- return 'decimal(10, 6)'
430
+ return 'decimal(12, 4)'
429
431
  elif int_step >= 1 and f_step > 2:
430
432
  return 'decimal(10, 4)'
431
433
  else:
@@ -8,6 +8,7 @@ import platform
8
8
  import re
9
9
  import time
10
10
  import warnings
11
+ import requests
11
12
  import pandas as pd
12
13
  from selenium import webdriver
13
14
  from selenium.webdriver.support.wait import WebDriverWait
@@ -18,6 +19,7 @@ from mdbq.config import set_support
18
19
  from selenium.webdriver.common.keys import Keys
19
20
  from mdbq.aggregation import aggregation
20
21
  from mdbq.clean import data_clean
22
+ from mdbq.other import ua_sj
21
23
 
22
24
  warnings.filterwarnings('ignore')
23
25
 
@@ -41,6 +43,7 @@ else:
41
43
  D_PATH = str(pathlib.Path(f'/Users/{getpass.getuser()}/Downloads'))
42
44
  Share_Path = str(pathlib.Path('/Volumes/时尚事业部/01.运营部/天猫报表')) # 共享文件根目录
43
45
  Source_Path = str(pathlib.Path(Data_Path, '原始文件2'))
46
+ upload_path = os.path.join(D_PATH, '数据上传中心') # 此目录位于下载文件夹
44
47
 
45
48
 
46
49
  def get_cookie_aikucun():
@@ -84,6 +87,7 @@ def get_cookie_aikucun():
84
87
  _driver.get(_url)
85
88
  time.sleep(0.1)
86
89
  _driver.maximize_window() # 窗口最大化 方便后续加载数据
90
+ print(f'请登录并切换到百宝箱,再保存 cookies: \n https://treasurebox.aikucun.com/dashboard/commodity/ranking/merchant?LS=true&shopId=1814114991487782914&from=menu&v=0.1936043279838604')
87
91
  breakpoint()
88
92
 
89
93
  d_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
@@ -122,7 +126,7 @@ class AikuCun:
122
126
 
123
127
  def login(self, shop_name='aikucun'):
124
128
  option = webdriver.ChromeOptions()
125
- # option.add_argument("--headless") # 设置无界面模式
129
+ option.add_argument("--headless") # 设置无界面模式
126
130
  # 调整chrome启动配置
127
131
  option.add_argument("--disable-gpu")
128
132
  option.add_argument("--no-sandbox")
@@ -154,6 +158,9 @@ class AikuCun:
154
158
  option.add_experimental_option('prefs', prefs)
155
159
  option.add_experimental_option('excludeSwitches', ['enable-automation']) # 实验性参数, 左上角小字
156
160
 
161
+ # 修改默认下载文件夹路径
162
+ option.add_experimental_option("prefs", {"download.default_directory": f'{upload_path}'})
163
+
157
164
  # # 通过excludeSwitches参数禁用默认的启动路径
158
165
  # option.add_experimental_option('excludeSwitches', ['enable-automation'])
159
166
 
@@ -238,7 +245,7 @@ class AikuCun:
238
245
  elements = _driver.find_elements(
239
246
  By.XPATH, '//button/span[contains(text(), "查询")]')
240
247
  _driver.execute_script("arguments[0].click();", elements[0]) # 点击
241
- time.sleep(3)
248
+ time.sleep(5)
242
249
  wait.until(EC.presence_of_element_located(
243
250
  (By.XPATH,
244
251
  '//button[@class="el-button el-button--primary el-button--small is-plain"]/span[contains(text(), "下载数据")]')))
@@ -246,12 +253,12 @@ class AikuCun:
246
253
  By.XPATH,
247
254
  '//button[@class="el-button el-button--primary el-button--small is-plain"]/span[contains(text(), "下载数据")]')
248
255
  _driver.execute_script("arguments[0].click();", elements[0]) # 点击
249
- time.sleep(3)
256
+ time.sleep(5)
250
257
  self.clean_data(date=new_date)
251
258
  _driver.quit()
252
259
 
253
260
  def clean_data(self, date):
254
- for root, dirs, files in os.walk(D_PATH, topdown=False):
261
+ for root, dirs, files in os.walk(upload_path, topdown=False):
255
262
  for name in files:
256
263
  if '~$' in name or 'DS_Store' in name:
257
264
  continue
@@ -280,11 +287,11 @@ class AikuCun:
280
287
 
281
288
  def akucun():
282
289
  akc = AikuCun()
283
- akc.get_data(shop_name='aikucun', date_num=7)
290
+ akc.get_data(shop_name='aikucun', date_num=10) # 获取最近 N 天数据,0表示今天
284
291
  # akc.clean_data()
285
292
 
286
293
  # 新版 数据分类
287
- dp = aggregation.DatabaseUpdate(path=D_PATH)
294
+ dp = aggregation.DatabaseUpdate(path=upload_path)
288
295
  dp.new_unzip(is_move=True)
289
296
  dp.cleaning(is_move=False, is_except=['临时文件']) # 清洗数据, 存入 self.datas, 不需要立即移除文件,仍保留文件到原始文件中
290
297
  # 将 self.datas 更新至数据库
@@ -295,7 +302,7 @@ def akucun():
295
302
  # {'nas': 'mysql'},
296
303
  ])
297
304
  # 数据分类
298
- c = data_clean.DataClean(path=D_PATH, source_path=Source_Path)
305
+ c = data_clean.DataClean(path=upload_path, source_path=Source_Path)
299
306
  c.set_up_to_mogo = False # 不再使用 data_clean 更新数据库,改为 aggregation.py
300
307
  c.set_up_to_mysql = False # 不再使用 data_clean 更新数据库,改为 aggregation.py
301
308
  c.new_unzip(is_move=True, ) # 解压文件
@@ -303,7 +310,57 @@ def akucun():
303
310
  c.move_all(is_except=['临时文件']) # 移到文件到原始文件夹
304
311
 
305
312
 
313
+ class AikuCunNew:
314
+
315
+ def __init__(self, shop_name,):
316
+ self.shop_name = shop_name
317
+ self.today = datetime.date.today()
318
+ self.headers = {'User-Agent': ua_sj.get_ua()}
319
+ self.cookie_path = os.path.join(set_support.SetSupport(dirname='support').dirname, 'cookies')
320
+ self.cookies = {}
321
+ self.get_cookies() # 更新 self.cookies 的值
322
+ self.support_path = set_support.SetSupport(dirname='support').dirname
323
+ self.start_date = (self.today - datetime.timedelta(days=15)).strftime('%Y-%m-%d')
324
+ self.end_date = (self.today - datetime.timedelta(days=1)).strftime('%Y-%m-%d')
325
+
326
+ def akc(self):
327
+ """
328
+ """
329
+ start_date = self.start_date
330
+ end_date = self.end_date
331
+ url = 'https://treasurebox.aikucun.com/api/web/merchant/treasure/commodity/list/down?'
332
+ self.headers.update({'Referer': 'https://treasurebox.aikucun.com/dashboard/commodity/ranking/merchant?LS=true&shopId=1814114991487782914&from=menu&v=0.1936043279838604'})
333
+ now = datetime.datetime.now()
334
+ timestamp_ms = round(time.mktime(now.timetuple()) * 1000 + now.microsecond / 1000)
335
+ data = {
336
+ 'time': timestamp_ms,
337
+ 'sign': '2DA6A7580C859B374AE830CAD78BB84B'
338
+ }
339
+ res = requests.post(
340
+ url,
341
+ headers=self.headers,
342
+ cookies=self.cookies,
343
+ params=data
344
+ )
345
+ print(res.text)
346
+
347
+
348
+
349
+ def get_cookies(self):
350
+ files = os.listdir(self.cookie_path)
351
+ for file in files:
352
+ if self.shop_name in file and '~' not in file:
353
+ with open(os.path.join(self.cookie_path, file), 'r') as f:
354
+ cookies_data = json.load(f)
355
+ break
356
+ for data in cookies_data:
357
+ self.cookies.update({data['name']: data['value']})
358
+
359
+
306
360
  if __name__ == '__main__':
307
361
  pass
308
362
  # get_cookie_aikucun()
309
363
  akucun()
364
+
365
+ # a = AikuCunNew(shop_name='aikucun')
366
+ # a.akc()
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 2.5.7
4
- Home-page: https://pypi.org/project/mdbq
3
+ Version: 2.5.9
4
+ Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
7
7
  License: MIT
@@ -3,10 +3,10 @@
3
3
  from setuptools import setup, find_packages
4
4
 
5
5
  setup(name='mdbq',
6
- version='2.5.7',
6
+ version='2.5.9',
7
7
  author='xigua, ',
8
8
  author_email="2587125111@qq.com",
9
- url='https://pypi.org/project/mdbq',
9
+ url='https://pypi.org/project/mdbsql',
10
10
  long_description='''
11
11
  世界上最庄严的问题:我能做什么好事?
12
12
  ''',
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes