mdbq 2.7.0__py3-none-any.whl → 2.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,7 @@
1
1
  # -*- coding:utf-8 -*-
2
2
  import warnings
3
+ from unittest.mock import inplace
4
+
3
5
  import pandas as pd
4
6
  import numpy as np
5
7
  import chardet
@@ -1118,7 +1120,7 @@ class DatabaseUpdate:
1118
1120
  def upload_dir(path, db_name, collection_name, dbs={'mysql': True, 'mongodb': True}, json_path=None, target_service='company'):
1119
1121
  """ 上传一个文件夹到 mysql 或者 mongodb 数据库 """
1120
1122
  if not os.path.isdir(path):
1121
- print(f'{os.path.splitext(os.path.basename(__file__))[0]}.upload_dir: 路径不存在或错误: {path}')
1123
+ print(f'{os.path.splitext(os.path.basename(__file__))[0]}.upload_dir: 函数只接受文件夹路径,不是一个文件夹: {path}')
1122
1124
  return
1123
1125
 
1124
1126
  if dbs['mongodb']:
@@ -1288,29 +1290,31 @@ def file_dir(one_file=True, target_service='company'):
1288
1290
 
1289
1291
 
1290
1292
  def test():
1291
- path = '/Users/xigua/数据中心/原始文件2/京东报表/JD商品明细spu'
1293
+ path = os.path.relpath(r'C:\同步空间\BaiduSyncdisk\原始文件3\京东报表\京准通_全sdfsdf站营销')
1292
1294
  for root, dirs, files in os.walk(path, topdown=False):
1293
1295
  for name in files:
1294
1296
  if name.endswith('.csv') and 'baidu' not in name and '~' not in name:
1297
+ # print(name)
1298
+ # df = pd.read_excel(os.path.join(root, name), header=0)
1295
1299
  df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
1296
- df['最近上架时间'].loc[0] = df['最近上架时间'].loc[1]
1297
- # print(df[['日期', '最近上架时间']])
1300
+ print(name, len(df))
1301
+ if len(df) == 0:
1302
+ print(name)
1303
+ os.remove(os.path.join(root, name))
1304
+ continue
1305
+ cols = df.columns.tolist()
1306
+ if '店铺名称' not in cols:
1307
+ df.insert(loc=1, column='店铺名称', value='京东箱包旗舰店')
1308
+ if '全站roi' in cols:
1309
+ df.rename(columns={'全站roi': '全站投产比'}, inplace=True)
1298
1310
  df.to_csv(os.path.join(root, name), encoding='utf-8_sig', index=False, header=True)
1311
+ # new_name = f'{os.path.splitext(name)[0]}.xlsx'
1312
+ # df.to_excel(os.path.join(root, name),
1313
+ # index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
1299
1314
  # break
1300
1315
  # break
1301
1316
 
1302
1317
 
1303
- def test2():
1304
- dp = DatabaseUpdate(path='/Users/xigua/Downloads')
1305
- dp.new_unzip(is_move=True)
1306
- dp.cleaning(is_move=False, ) # 清洗数据, 存入 self.datas
1307
- dp.upload_df(service_databases=[
1308
- # {'home_lx': 'mongodb'},
1309
- {'company': 'mysql'},
1310
- # {'nas': 'mysql'}
1311
- ], path=None, service_name=None)
1312
-
1313
-
1314
1318
  if __name__ == '__main__':
1315
1319
  username, password, host, port = get_myconf.select_config_values(target_service='nas', database='mysql')
1316
1320
  print(username, password, host, port)
@@ -1318,32 +1322,22 @@ if __name__ == '__main__':
1318
1322
 
1319
1323
  # # 上传 1 个文件到数据库
1320
1324
  # one_file_to_mysql(
1321
- # file='/Users/xigua/Downloads/万里马箱包推广1_营销概况_qwqw全站营销_2024-08-18_2024-09-01.csv',
1322
- # db_name='京东数据3',
1323
- # table_name='推广数据_全站营销',
1324
- # target_service='company',
1325
+ # file=r'C:\同步空间\BaiduSyncdisk\原始文件2\属性设置\电商定价.csv',
1326
+ # db_name='属性设置3',
1327
+ # table_name='电商定价',
1328
+ # target_service='home_lx',
1325
1329
  # database='mysql'
1326
1330
  # )
1327
1331
 
1328
1332
  # 上传一个目录到指定数据库
1329
1333
  db_name = '推广数据2'
1330
- table_name = '营销场景报表'
1334
+ table_name = '品销宝'
1331
1335
  upload_dir(
1332
- path='/Users/xigua/数据中心/原始文件3/天猫推广报表/营销场景报表',
1336
+ path=os.path.relpath(r'C:\同步空间\BaiduSyncdisk\原始文件3\天猫推广报表\品销宝'),
1333
1337
  db_name=db_name,
1334
1338
  collection_name=table_name,
1335
1339
  dbs={'mysql': True, 'mongodb': False},
1336
- target_service='company',
1340
+ target_service='home_lx',
1337
1341
  )
1338
1342
 
1339
-
1340
- # # 新版 数据分类
1341
- # dp = DatabaseUpdate(path='/Users/xigua/Downloads')
1342
- # dp.new_unzip(is_move=True)
1343
- # dp.cleaning(is_move=False) # 清洗数据, 存入 self.datas, 不需要立即移除文件,仍保留文件到原始文件中
1344
- # # 将 self.datas 更新至数据库
1345
- # # dp.upload_df(service_databases=[
1346
- # # # {'home_lx': 'mongodb'},
1347
- # # {'company': 'mysql'},
1348
- # # # {'nas': 'mysql'},
1349
- # # ])
1343
+ # test()
@@ -1,28 +1,20 @@
1
1
  # -*- coding:utf-8 -*-
2
2
  import warnings
3
3
  import pandas as pd
4
- import numpy as np
5
- import chardet
6
- import zipfile
7
-
8
- from numpy import dtype
9
- from pandas.tseries.holiday import next_monday
10
- from pyzipper import PyZipFile
11
4
  import os
12
5
  import platform
13
6
  import json
14
7
  import pymysql
8
+ import socket
15
9
  from mdbq.mongo import mongo
16
10
  from mdbq.mysql import mysql
17
11
  from mdbq.mysql import s_query
18
- from mdbq.config import get_myconf
12
+ from mdbq.config import myconfig
19
13
  from mdbq.config import set_support
20
14
  from mdbq.dataframe import converter
21
15
  import datetime
22
16
  import time
23
17
  import re
24
- import shutil
25
- import getpass
26
18
 
27
19
  from sqlalchemy.dialects.postgresql.pg_catalog import pg_get_serial_sequence
28
20
 
@@ -54,7 +46,7 @@ class DataTypes:
54
46
  self.path = set_support.SetSupport(dirname='support').dirname
55
47
  self.service_name = service_name
56
48
  if not self.service_name:
57
- self.service_name = 'home_lx'
49
+ self.service_name = 'xigua_lx'
58
50
  self.json_file = os.path.join(self.path, f'mysql_types_{self.service_name}.json')
59
51
  if not os.path.isdir(self.path):
60
52
  os.makedirs(self.path)
@@ -154,88 +146,89 @@ class DataTypes:
154
146
  return {}, cl, None, None # 返回这些结果的目的是等添加完列再写 json 文件才能读到 types 信息
155
147
 
156
148
 
157
- def mysql_all_dtypes(db_name=None, table_name=None, service_database={'home_lx': 'mysql'}, path=None):
149
+ def mysql_all_dtypes(db_name=None, table_name=None, path=None):
158
150
  """
159
- 更新笔记本 mysql 中所有数据库的 dtypes 信息到本地 json
151
+ 更新 mysql 中所有数据库的 dtypes 信息到本地 json
160
152
  """
161
- for service_name, database in service_database.items():
162
- username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
163
- config = {
164
- 'host': host,
165
- 'port': port,
166
- 'user': username,
167
- 'password': password,
168
- 'charset': 'utf8mb4', # utf8mb4 支持存储四字节的UTF-8字符集
169
- 'cursorclass': pymysql.cursors.DictCursor,
170
- }
171
-
153
+ username, password, host, port, service_name = None, None, None, None, None
154
+ conf = myconfig.main()
155
+ if socket.gethostname().lower() in ['xigua_lx', 'xigua1', 'macbook pro']:
156
+ data = conf['Windows']['xigua_lx']['mysql']['local']
157
+ username, password, host, port = data['username'], data['password'], data['host'], data['port']
158
+ service_name = 'xigua_lx' # 影响 mysql_types_xigua_lx.json 文件名
159
+ elif socket.gethostname().lower() in ['company', 'Mac2.local']:
160
+ data = conf['Windows']['company']['mysql']['local']
161
+ username, password, host, port = data['username'], data['password'], data['host'], data['port']
162
+ service_name = 'company' # 影响 mysql_types_company.json 文件名
163
+ if not username or not service_name:
164
+ return
165
+
166
+ config = {
167
+ 'host': host,
168
+ 'port': int(port),
169
+ 'user': username,
170
+ 'password': password,
171
+ 'charset': 'utf8mb4', # utf8mb4 支持存储四字节的UTF-8字符集
172
+ 'cursorclass': pymysql.cursors.DictCursor,
173
+ }
174
+ connection = pymysql.connect(**config) # 连接数据库
175
+ with connection.cursor() as cursor:
176
+ sql = "SHOW DATABASES;"
177
+ cursor.execute(sql)
178
+ db_name_lists = cursor.fetchall()
179
+ db_name_lists = [item['Database'] for item in db_name_lists]
180
+ connection.close()
181
+
182
+ sys_lists = ['information_schema', 'mysql', 'performance_schema', 'sakila', 'sys']
183
+ db_name_lists = [item for item in db_name_lists if item not in sys_lists]
184
+
185
+ results = [] # 返回结果示例: [{'云电影': '电影更新'}, {'生意经2': 'e3_零售明细统计'}]
186
+ for db_ in db_name_lists:
187
+ config.update({'database': db_}) # 添加更新 config 字段
172
188
  connection = pymysql.connect(**config) # 连接数据库
173
- with connection.cursor() as cursor:
174
- sql = "SHOW DATABASES;"
175
- cursor.execute(sql)
176
- db_name_lists = cursor.fetchall()
177
- db_name_lists = [item['Database'] for item in db_name_lists]
189
+ try:
190
+ with connection.cursor() as cursor:
191
+ sql = f"SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{db_}';"
192
+ sql = "SHOW TABLES;"
193
+ cursor.execute(sql)
194
+ res_tables = cursor.fetchall()
195
+ for res_table in res_tables:
196
+ for k, v in res_table.items():
197
+ results.append({db_: v})
198
+ except:
199
+ pass
200
+ finally:
178
201
  connection.close()
179
-
180
- sys_lists = ['information_schema', 'mysql', 'performance_schema', 'sakila', 'sys']
181
- db_name_lists = [item for item in db_name_lists if item not in sys_lists]
182
-
183
- # db_name_lists = [
184
- # '京东数据2',
185
- # '推广数据2',
186
- # '市场数据2',
187
- # '生意参谋2',
188
- # '生意经2',
189
- # '属性设置2',
190
- # '聚合数据',
191
- # ]
192
- results = [] # 返回结果示例: [{'云电影': '电影更新'}, {'生意经2': 'e3_零售明细统计'}]
193
- for db_ in db_name_lists:
194
- config.update({'database': db_}) # 添加更新 config 字段
195
- connection = pymysql.connect(**config) # 连接数据库
196
- try:
197
- with connection.cursor() as cursor:
198
- sql = f"SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{db_}';"
199
- sql = "SHOW TABLES;"
200
- cursor.execute(sql)
201
- res_tables = cursor.fetchall()
202
- for res_table in res_tables:
203
- for k, v in res_table.items():
204
- results.append({db_: v})
205
- except:
206
- pass
207
- finally:
208
- connection.close()
209
- time.sleep(0.5)
210
-
211
- d = DataTypes(path=path, service_name=service_name)
212
- for result in results:
213
- for db_n, table_n in result.items():
214
- # print(db_n, table_n, db_name, table_name)
215
- if db_name and table_name: # 下载一个指定的数据表
216
- if db_name != db_n or table_name != table_n:
217
- continue
218
- elif db_name: # 下载一个数据库的所有数据表
219
- if db_name != db_n:
220
- continue
221
- # 如果 db_name 和 table_name 都不指定,则下载所有数据库的所有数据表
222
- print(f'获取列信息 数据库: < {db_n} >, 数据表: < {table_n} >')
223
- sq = s_query.QueryDatas(username=username, password=password, host=host, port=port)
224
- # 获取数据表的指定列, 返回列表
225
- # [{'视频bv号': 'BV1Dm4y1S7BU', '下载进度': 1}, {'视频bv号': 'BV1ov411c7US', '下载进度': 1}]
226
- name_type = sq.dtypes_to_list(db_name=db_n, table_name=table_n)
227
- if name_type:
228
- dtypes = {item['COLUMN_NAME']: item['COLUMN_TYPE'] for item in name_type}
229
- dtypes = {'mysql': {db_n: {table_n: dtypes}}}
230
- d.get_mysql_types(
231
- dtypes=dtypes,
232
- cl='mysql',
233
- db_name=db_n,
234
- table_name=table_n,
235
- is_file_dtype=True # True表示旧文件有限
236
- )
237
- else:
238
- print(f'数据库回传数据(name_type)为空')
202
+ time.sleep(0.5)
203
+
204
+ d = DataTypes(path=path, service_name=service_name)
205
+ for result in results:
206
+ for db_n, table_n in result.items():
207
+ # print(db_n, table_n, db_name, table_name)
208
+ if db_name and table_name: # 下载一个指定的数据表
209
+ if db_name != db_n or table_name != table_n:
210
+ continue
211
+ elif db_name: # 下载一个数据库的所有数据表
212
+ if db_name != db_n:
213
+ continue
214
+ # 如果 db_name 和 table_name 都不指定,则下载所有数据库的所有数据表
215
+ print(f'获取列信息 数据库: < {db_n} >, 数据表: < {table_n} >')
216
+ sq = s_query.QueryDatas(username=username, password=password, host=host, port=port)
217
+ # 获取数据表的指定列, 返回列表
218
+ # [{'视频bv号': 'BV1Dm4y1S7BU', '下载进度': 1}, {'视频bv号': 'BV1ov411c7US', '下载进度': 1}]
219
+ name_type = sq.dtypes_to_list(db_name=db_n, table_name=table_n)
220
+ if name_type:
221
+ dtypes = {item['COLUMN_NAME']: item['COLUMN_TYPE'] for item in name_type}
222
+ dtypes = {'mysql': {db_n: {table_n: dtypes}}}
223
+ d.get_mysql_types(
224
+ dtypes=dtypes,
225
+ cl='mysql',
226
+ db_name=db_n,
227
+ table_name=table_n,
228
+ is_file_dtype=True # True表示旧文件有限
229
+ )
230
+ else:
231
+ print(f'数据库回传数据(name_type)为空')
239
232
  # print(d.datas)
240
233
  d.as_json_file()
241
234
 
@@ -243,5 +236,5 @@ def mysql_all_dtypes(db_name=None, table_name=None, service_database={'home_lx':
243
236
  if __name__ == '__main__':
244
237
  # 更新 mysql 中所有数据库的 dtypes 信息到本地 json
245
238
  mysql_all_dtypes(
246
- path='/Users/xigua/Downloads', service_name='company',
239
+ path='/Users/xigua/Downloads',
247
240
  )
@@ -1,7 +1,7 @@
1
1
  # -*- coding: UTF-8 –*-
2
2
  from mdbq.mongo import mongo
3
3
  from mdbq.mysql import mysql
4
- from mdbq.config import get_myconf
4
+ from mdbq.config import myconfig
5
5
  import socket
6
6
  import subprocess
7
7
  import psutil
@@ -10,6 +10,19 @@ import platform
10
10
  """
11
11
  对指定数据库所有冗余数据进行清理
12
12
  """
13
+ if socket.gethostname().lower() in ['xigua_lx', 'xigua1', 'macbook pro']:
14
+ conf = myconfig.main()
15
+ data = conf['Windows']['xigua_lx']['mysql']['local']
16
+ username, password, host, port = data['username'], data['password'], data['host'], data['port']
17
+ service_database = {'xigua_lx': 'mysql'}
18
+ elif socket.gethostname().lower() in ['company', 'Mac2.local']:
19
+ conf = myconfig.main()
20
+ data = conf['Windows']['company']['mysql']['local']
21
+ username, password, host, port = data['username'], data['password'], data['host'], data['port']
22
+ service_database = {'company': 'mysql'}
23
+ if not username:
24
+ print(f'找不到主机:')
25
+
13
26
 
14
27
 
15
28
  def restart_mongodb():
@@ -57,60 +70,25 @@ def restart_mongodb():
57
70
  subprocess.call(command, shell=True)
58
71
 
59
72
 
60
- def op_data(db_name_lists, service_databases=[{'home_lx': 'mysql', 'home_lx': 'mongodb'}], days: int = 63, is_mongo=True, is_mysql=True):
73
+ def op_data(db_name_lists, days: int = 63, is_mongo=True, is_mysql=True):
61
74
  """ """
62
- # for service_database in service_databases:
63
- # for service_name, database in service_database.items():
64
- # username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
65
- # s = mysql.OptimizeDatas(username=username, password=password, host=host, port=port)
66
- # s.db_name_lists = [
67
- # '聚合数据',
68
- # ]
69
- # s.days = days
70
- # s.optimize_list()
71
- for service_database in service_databases:
72
- for service_name, database in service_database.items():
73
- if socket.gethostname() == 'xigua_lx' or socket.gethostname() == 'xigua1' or socket.gethostname() == 'Mac2.local':
74
- # mongodb
75
- if is_mongo and database == 'mongodb':
76
- username, password, host, port = get_myconf.select_config_values(
77
- target_service=service_name,
78
- database=database,
79
- )
80
- m = mongo.OptimizeDatas(username=username, password=password, host=host, port=port)
81
- m.db_name_lists = db_name_lists
82
- m.days = days
83
- m.optimize_list()
84
- if m.client:
85
- m.client.close()
86
- print(f'已关闭 mongodb 连接')
87
-
88
- if socket.gethostname() == 'xigua_lx':
89
- restart_mongodb() # mongodb 太占内存了, 重启服务, 释放内存
90
75
 
91
- # Mysql
92
- if is_mysql and database == 'mysql':
93
- username, password, host, port = get_myconf.select_config_values(
94
- target_service=service_name,
95
- database=database,
96
- )
97
- s = mysql.OptimizeDatas(username=username, password=password, host=host, port=port)
98
- s.db_name_lists = db_name_lists
99
- s.days = days
100
- s.optimize_list()
76
+ if socket.gethostname() == 'xigua_lx' or socket.gethostname() == 'xigua1' or socket.gethostname() == 'Mac2.local':
77
+ # Mysql
78
+ if is_mysql:
79
+ s = mysql.OptimizeDatas(username=username, password=password, host=host, port=port)
80
+ s.db_name_lists = db_name_lists
81
+ s.days = days
82
+ s.optimize_list()
101
83
 
102
- elif socket.gethostname() == 'company':
103
- # Mysql
104
- if is_mysql and database == 'mysql':
105
- username, password, host, port = get_myconf.select_config_values(
106
- target_service=service_name,
107
- database=database,
108
- )
109
- s = mysql.OptimizeDatas(username=username, password=password, host=host, port=port)
110
- s.db_name_lists = db_name_lists
111
- s.days = days
112
- s.optimize_list()
84
+ elif socket.gethostname() == 'company':
85
+ # Mysql
86
+ if is_mysql:
87
+ s = mysql.OptimizeDatas(username=username, password=password, host=host, port=port)
88
+ s.db_name_lists = db_name_lists
89
+ s.days = days
90
+ s.optimize_list()
113
91
 
114
92
 
115
93
  if __name__ == '__main__':
116
- op_data(db_name_lists=['聚合数据'], service_databases=[{'company': 'mysql'}], days=3650, is_mongo=True, is_mysql=True)
94
+ op_data(db_name_lists=['聚合数据'], days=10, is_mongo=True, is_mysql=True)