mdbq 1.3.2__tar.gz → 1.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbq-1.3.2 → mdbq-1.3.3}/PKG-INFO +1 -1
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/aggregation/aggregation.py +15 -14
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/aggregation/mysql_types.py +8 -10
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/mysql/mysql.py +30 -33
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq.egg-info/PKG-INFO +1 -1
- {mdbq-1.3.2 → mdbq-1.3.3}/setup.py +1 -1
- {mdbq-1.3.2 → mdbq-1.3.3}/README.txt +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/__init__.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/__version__.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/aggregation/__init__.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/aggregation/df_types.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/aggregation/optimize_data.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/aggregation/query_data.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/bdup/__init__.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/bdup/bdup.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/clean/__init__.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/clean/data_clean.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/company/__init__.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/company/copysh.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/config/__init__.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/config/get_myconf.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/config/products.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/config/set_support.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/config/update_conf.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/dataframe/__init__.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/dataframe/converter.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/log/__init__.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/log/mylogger.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/mongo/__init__.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/mongo/mongo.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/mysql/__init__.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/mysql/s_query.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/mysql/year_month_day.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/other/__init__.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/other/porxy.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/other/pov_city.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/other/ua_sj.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/pbix/__init__.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/pbix/pbix_refresh.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/pbix/refresh_all.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq/spider/__init__.py +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq.egg-info/SOURCES.txt +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq.egg-info/dependency_links.txt +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/mdbq.egg-info/top_level.txt +0 -0
- {mdbq-1.3.2 → mdbq-1.3.3}/setup.cfg +0 -0
@@ -599,11 +599,11 @@ class DatabaseUpdate:
|
|
599
599
|
}
|
600
600
|
)
|
601
601
|
|
602
|
-
def upload_df(self, service_databases=[{}]):
|
602
|
+
def upload_df(self, service_databases=[{}], path=None, system_name=None):
|
603
603
|
"""
|
604
604
|
将清洗后的 df 上传数据库
|
605
605
|
"""
|
606
|
-
df_to_json = df_types.DataTypes() # json 文件, 包含数据的 dtypes 信息
|
606
|
+
df_to_json = df_types.DataTypes(path=path, system_name=system_name) # json 文件, 包含数据的 dtypes 信息
|
607
607
|
for service_database in service_databases:
|
608
608
|
for service_name, database in service_database.items():
|
609
609
|
# print(service_name, database)
|
@@ -803,7 +803,7 @@ class DatabaseUpdate:
|
|
803
803
|
return df
|
804
804
|
|
805
805
|
|
806
|
-
def upload_dir(path, db_name, collection_name, dbs={'mysql': True, 'mongodb': True}):
|
806
|
+
def upload_dir(path, db_name, collection_name, dbs={'mysql': True, 'mongodb': True}, json_path=None, system_name=None):
|
807
807
|
""" 上传一个文件夹到 mysql 或者 mongodb 数据库 """
|
808
808
|
if not os.path.isdir(path):
|
809
809
|
print(f'{os.path.splitext(os.path.basename(__file__))[0]}.upload_dir: 函数只接受文件夹路径,不是一个文件夹: {path}')
|
@@ -845,7 +845,7 @@ def upload_dir(path, db_name, collection_name, dbs={'mysql': True, 'mongodb': Tr
|
|
845
845
|
)
|
846
846
|
|
847
847
|
# 从本地 json 文件从读取 df 的数据类型信息
|
848
|
-
df_to_json = df_types.DataTypes()
|
848
|
+
df_to_json = df_types.DataTypes(path=json_path, system_name=system_name)
|
849
849
|
dtypes = df_to_json.load_dtypes(
|
850
850
|
db_name=db_name,
|
851
851
|
collection_name=collection_name,
|
@@ -976,13 +976,13 @@ def test2():
|
|
976
976
|
# {'home_lx': 'mongodb'},
|
977
977
|
{'home_lx': 'mysql'},
|
978
978
|
# {'nas': 'mysql'}
|
979
|
-
])
|
979
|
+
], path=None, system_name=None)
|
980
980
|
|
981
981
|
|
982
982
|
if __name__ == '__main__':
|
983
983
|
# username, password, host, port = get_myconf.select_config_values(target_service='nas', database='mysql')
|
984
984
|
# print(username, password, host, port)
|
985
|
-
|
985
|
+
file_dir(one_file=False)
|
986
986
|
# one_file_to_mysql(
|
987
987
|
# file='/Users/xigua/数据中心/原始文件2/推广报表/品销宝/账户/账户_明星店铺报表_2023-11-13_2023-12-12.csv',
|
988
988
|
# db_name='推广数据2',
|
@@ -1001,11 +1001,12 @@ if __name__ == '__main__':
|
|
1001
1001
|
|
1002
1002
|
# test2()
|
1003
1003
|
|
1004
|
-
file = '
|
1005
|
-
df = pd.read_csv(file, encoding='utf-8_sig', header=0, na_filter=False)
|
1006
|
-
username, password, host, port = get_myconf.select_config_values(target_service='company', database='mysql')
|
1007
|
-
m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
|
1008
|
-
m.df_to_mysql(df=df, db_name='test', table_name='增量更新测试',
|
1009
|
-
|
1010
|
-
|
1011
|
-
|
1004
|
+
# file = ''
|
1005
|
+
# df = pd.read_csv(file, encoding='utf-8_sig', header=0, na_filter=False)
|
1006
|
+
# username, password, host, port = get_myconf.select_config_values(target_service='company', database='mysql')
|
1007
|
+
# m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
|
1008
|
+
# m.df_to_mysql(df=df, db_name='test', table_name='增量更新测试',
|
1009
|
+
# drop_dup=False,
|
1010
|
+
# # icm_update=['日期', '推广费余额'],
|
1011
|
+
# system_name='company',
|
1012
|
+
# )
|
@@ -38,7 +38,7 @@ class DataTypes:
|
|
38
38
|
数据简介: 记录 dataframe 或者数据库的列信息(dtypes),可以记录其信息或者加载相关信息用于入库使用,
|
39
39
|
第一字段为分类(如 dataframe/mysql),第二字段为数据库名,第三字段为集合名,第四段列名及其数据类型
|
40
40
|
"""
|
41
|
-
def __init__(self):
|
41
|
+
def __init__(self, path=None, system_name=None):
|
42
42
|
self.datas = {
|
43
43
|
'_json统计':
|
44
44
|
{
|
@@ -49,8 +49,12 @@ class DataTypes:
|
|
49
49
|
'数据简介': '记录数据库各表的数据类型信息',
|
50
50
|
}
|
51
51
|
}
|
52
|
-
self.path =
|
53
|
-
self.
|
52
|
+
self.path = path
|
53
|
+
if not self.path:
|
54
|
+
self.path = set_support.SetSupport(dirname='support').dirname
|
55
|
+
self.system_name = system_name
|
56
|
+
if not self.system_name:
|
57
|
+
self.system_name = 'home_lx'
|
54
58
|
self.json_file = os.path.join(self.path, f'mysql_types_{self.system_name}.json')
|
55
59
|
if not os.path.isdir(self.path):
|
56
60
|
os.makedirs(self.path)
|
@@ -154,8 +158,6 @@ def mysql_all_dtypes(db_name=None, table_name=None, path=None, system_name=None)
|
|
154
158
|
"""
|
155
159
|
更新笔记本 mysql 中所有数据库的 dtypes 信息到本地 json
|
156
160
|
"""
|
157
|
-
if not path:
|
158
|
-
path = set_support.SetSupport(dirname='support').dirname
|
159
161
|
|
160
162
|
username, password, host, port = get_myconf.select_config_values(target_service='home_lx', database='mysql')
|
161
163
|
config = {
|
@@ -206,10 +208,7 @@ def mysql_all_dtypes(db_name=None, table_name=None, path=None, system_name=None)
|
|
206
208
|
connection.close()
|
207
209
|
time.sleep(0.5)
|
208
210
|
|
209
|
-
d = DataTypes()
|
210
|
-
if system_name:
|
211
|
-
d.system_name = system_name # 影响 json 文件名
|
212
|
-
# d.json_file = os.path.join(path, f'mysql_types.json') # # json 保存位置
|
211
|
+
d = DataTypes(path=path, system_name=system_name)
|
213
212
|
for result in results:
|
214
213
|
for db_n, table_n in result.items():
|
215
214
|
# print(db_n, table_n, db_name, table_name)
|
@@ -221,7 +220,6 @@ def mysql_all_dtypes(db_name=None, table_name=None, path=None, system_name=None)
|
|
221
220
|
continue
|
222
221
|
# 如果 db_name 和 table_name 都不指定,则下载所有数据库的所有数据表
|
223
222
|
print(f'获取列信息 数据库: < {db_n} >, 数据表: < {table_n} >')
|
224
|
-
# d.mysql_dtypes_to_json(db_name=db_n, table_name=table_n, path=path)
|
225
223
|
sq = s_query.QueryDatas(username=username, password=password, host=host, port=port)
|
226
224
|
# 获取数据表的指定列, 返回列表
|
227
225
|
# [{'视频bv号': 'BV1Dm4y1S7BU', '下载进度': 1}, {'视频bv号': 'BV1ov411c7US', '下载进度': 1}]
|
@@ -57,7 +57,7 @@ class MysqlUpload:
|
|
57
57
|
}
|
58
58
|
self.filename = None
|
59
59
|
|
60
|
-
def df_to_mysql(self, df, table_name, db_name='远程数据源', icm_update=[], icm_up=[], df_sql=False, drop_dup=True, drop_duplicates=False, filename=None, count=None, system_name=None):
|
60
|
+
def df_to_mysql(self, df, table_name, db_name='远程数据源', icm_update=[], icm_up=[], df_sql=False, drop_dup=True, drop_duplicates=False, filename=None, count=None, json_path=None, system_name=None):
|
61
61
|
"""
|
62
62
|
将 df 写入数据库
|
63
63
|
db_name: 数据库名称
|
@@ -115,44 +115,42 @@ class MysqlUpload:
|
|
115
115
|
connection = pymysql.connect(**self.config) # 重新连接数据库
|
116
116
|
with connection.cursor() as cursor:
|
117
117
|
# 1. 查询表, 不存在则创建一个空表
|
118
|
-
sql =
|
119
|
-
cursor.execute(sql)
|
118
|
+
sql = "SHOW TABLES LIKE %s;" # 有特殊字符不需转义
|
119
|
+
cursor.execute(sql, (table_name))
|
120
120
|
if not cursor.fetchone():
|
121
|
-
sql = f"CREATE TABLE IF NOT EXISTS `{table_name}` (id INT AUTO_INCREMENT PRIMARY KEY)"
|
121
|
+
sql = f"CREATE TABLE IF NOT EXISTS `{table_name}` (id INT AUTO_INCREMENT PRIMARY KEY);"
|
122
122
|
cursor.execute(sql)
|
123
123
|
print(f'创建 mysql 表: {table_name}')
|
124
124
|
|
125
125
|
# 2. 列数据类型转换,将 df 数据类型转换为 mysql 的数据类型
|
126
|
-
dtypes, cl, db_n, tb_n = self.convert_dtypes(df=df, db_name=db_name, table_name=table_name, system_name=system_name)
|
126
|
+
dtypes, cl, db_n, tb_n = self.convert_dtypes(df=df, db_name=db_name, table_name=table_name, path=json_path, system_name=system_name)
|
127
127
|
|
128
128
|
# 有特殊字符不需转义
|
129
|
-
sql =
|
130
|
-
cursor.execute(sql)
|
129
|
+
sql = "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s;"
|
130
|
+
cursor.execute(sql, (db_name, table_name))
|
131
131
|
col_exist = [item['COLUMN_NAME'] for item in cursor.fetchall()]
|
132
132
|
cols = df.columns.tolist()
|
133
133
|
col_not_exist = [col for col in cols if col not in col_exist]
|
134
134
|
# 检查列,不存在则新建列
|
135
135
|
if col_not_exist: # 数据表中不存在的列
|
136
136
|
for col in col_not_exist:
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
except Exception as e:
|
150
|
-
print(f'{self.filename}: {e}')
|
137
|
+
# 创建列,需转义
|
138
|
+
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]};"
|
139
|
+
cursor.execute(sql)
|
140
|
+
print(f"添加列: {col}({dtypes[col]})") # 添加列并指定数据类型
|
141
|
+
|
142
|
+
# 创建索引
|
143
|
+
if col == '日期':
|
144
|
+
sql = f"SHOW INDEXES FROM `{table_name}` WHERE `Column_name` = %s"
|
145
|
+
cursor.execute(sql, (col))
|
146
|
+
result = cursor.fetchone() # 检查索引是否存在
|
147
|
+
if not result:
|
148
|
+
cursor.execute(f"CREATE INDEX index_name ON `{table_name}`(`{col}`)")
|
151
149
|
connection.commit() # 提交事务
|
152
150
|
|
153
151
|
if df_sql:
|
154
152
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
|
155
|
-
print(f'{now}
|
153
|
+
print(f'{now}正在更新: mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count}, {self.filename}')
|
156
154
|
engine = create_engine(
|
157
155
|
f"mysql+pymysql://{self.username}:{self.password}@{self.host}:{self.port}/{db_name}") # 创建数据库引擎
|
158
156
|
df.to_sql(
|
@@ -195,20 +193,21 @@ class MysqlUpload:
|
|
195
193
|
try:
|
196
194
|
cols = ', '.join(f"`{item}`" for item in data.keys()) # 列名需要转义
|
197
195
|
# data.update({item: f"{data[item]}" for item in data.keys()}) # 全部值转字符, 不是必须的
|
198
|
-
values = ', '.join([f"
|
196
|
+
values = ', '.join([f'"{item}"' for item in data.values()]) # 值要加引号
|
199
197
|
condition = []
|
200
198
|
for k, v in data.items():
|
201
|
-
condition += [f
|
199
|
+
condition += [f'`{k}` = "{v}"']
|
202
200
|
condition = ' AND '.join(condition) # 构建查询条件
|
203
201
|
# print(condition)
|
204
202
|
|
205
203
|
if drop_dup: # 查重插入
|
206
|
-
sql =
|
204
|
+
sql = "SELECT %s FROM %s WHERE %s" % (cols, table_name, condition)
|
207
205
|
# sql = f"SELECT {cols} FROM `{table_name}` WHERE `创建时间` = '2014-09-19 14:32:33'"
|
206
|
+
# print(sql)
|
208
207
|
cursor.execute(sql)
|
209
208
|
result = cursor.fetchall() # 获取查询结果, 有结果返回 list 表示数据已存在(不重复插入),没有则返回空 tuple
|
210
209
|
if not result: # 数据不存在则插入
|
211
|
-
sql = f"INSERT INTO `{table_name}` ({cols}) VALUES (
|
210
|
+
sql = f"INSERT INTO `{table_name}` ({cols}) VALUES (%s);" % (values)
|
212
211
|
cursor.execute(sql)
|
213
212
|
# else:
|
214
213
|
# print(f'重复数据不插入: {condition[:50]}...')
|
@@ -224,7 +223,7 @@ class MysqlUpload:
|
|
224
223
|
unique_keys = ', '.join(f"`{item}`" for item in update_col) # 列名需要转义
|
225
224
|
condition = []
|
226
225
|
for up_col in icm_update:
|
227
|
-
condition += [f
|
226
|
+
condition += [f'`{up_col}` = "{data[up_col]}"']
|
228
227
|
condition = ' AND '.join(condition) # condition值示例: `品销宝余额` = '2930.73' AND `短信剩余` = '67471'
|
229
228
|
sql = f"SELECT {unique_keys} FROM `{table_name}` WHERE {condition}"
|
230
229
|
# print(sql)
|
@@ -251,7 +250,7 @@ class MysqlUpload:
|
|
251
250
|
not_change_col += [item for item in update_col if item != col]
|
252
251
|
# change_values 是 df 传进来且和数据库对比后,发生了变化的数据,值示例: [`品销宝余额` = '9999.0', `短信剩余` = '888']
|
253
252
|
if change_values: # change_values 有数据返回,表示值需要更新
|
254
|
-
not_change_values = [f
|
253
|
+
not_change_values = [f'`{col}` = "{str(data[col])}"' for col in not_change_col]
|
255
254
|
not_change_values = ' AND '.join(not_change_values) # 示例: `短信剩余` = '888' AND `test1` = '93'
|
256
255
|
# print(change_values, not_change_values)
|
257
256
|
condition += f' AND {not_change_values}' # 重新构建完整的查询条件,将未发生变化的列加进查询条件
|
@@ -283,7 +282,7 @@ class MysqlUpload:
|
|
283
282
|
# # cursor.execute(sql)
|
284
283
|
|
285
284
|
else:
|
286
|
-
sql = f"INSERT INTO `{table_name}` ({cols}) VALUES (
|
285
|
+
sql = f"INSERT INTO `{table_name}` ({cols}) VALUES (%s);" % (values)
|
287
286
|
cursor.execute(sql)
|
288
287
|
except Exception as e:
|
289
288
|
# print(data)
|
@@ -293,7 +292,7 @@ class MysqlUpload:
|
|
293
292
|
connection.commit() # 提交事务
|
294
293
|
connection.close()
|
295
294
|
|
296
|
-
def convert_dtypes(self, df, db_name, table_name, system_name=None):
|
295
|
+
def convert_dtypes(self, df, db_name, table_name, path=None, system_name=None):
|
297
296
|
"""
|
298
297
|
根据本地 json 转换 df 的类型为 mysql 专有的数据类型
|
299
298
|
可能不存在本地 json 文件 (函数按指定规则转换并更新 json)
|
@@ -301,9 +300,7 @@ class MysqlUpload:
|
|
301
300
|
"""
|
302
301
|
cols = df.columns.tolist()
|
303
302
|
# path = set_support.SetSupport(dirname='support').dirname
|
304
|
-
d = mysql_types.DataTypes()
|
305
|
-
if system_name:
|
306
|
-
d.system_name = system_name
|
303
|
+
d = mysql_types.DataTypes(path=path, system_name=system_name)
|
307
304
|
# 从本地文件中读取 dtype 信息
|
308
305
|
dtypes, cl, db_n, tb_n = d.load_dtypes(cl='mysql', db_name=db_name, table_name=table_name)
|
309
306
|
# 可能会因为没有 json 文件, 返回 None
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|