mdbq 3.2.11__py3-none-any.whl → 3.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1324,7 +1324,91 @@ def cut_as_year_month(as_month=False):
1324
1324
  df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False, header=True)
1325
1325
 
1326
1326
 
1327
+ def doc_to_sql(write_data=False, read_data=False):
1328
+ if not write_data and not read_data:
1329
+ return
1330
+ # filename = '关于做好2024年世界互联网大会乌镇峰会期间寄递渠道安全保障工作的通知.pdf'
1331
+ path = '/Users/xigua/数据中心/微信pdf文件/2024-10'
1332
+
1333
+ if not os.path.isdir(path):
1334
+ print(f'不存在的文件夹: {path}')
1335
+ return
1336
+ m_engine = mysql.MysqlUpload(
1337
+ username=username,
1338
+ password=password,
1339
+ host=host,
1340
+ port=port,
1341
+ charset='utf8mb4'
1342
+ )
1343
+ if write_data:
1344
+ for root, dirs, files in os.walk(path, topdown=False):
1345
+ for name in files:
1346
+ if '~$' in name or '.DS' in name or '.localized' in name or 'baidu' in name:
1347
+ continue
1348
+ if name.endswith('.pdf') or name.endswith('.pptx'):
1349
+ file_size = os.stat(os.path.join(root, name)).st_size
1350
+ if file_size > 1024 * 1024 * 1024:
1351
+ file_size = file_size / 1024 / 1024 / 1024
1352
+ file_size = f'{file_size:.2f} GB'
1353
+ elif file_size > 1024 * 1024:
1354
+ file_size = file_size / 1024 / 1024
1355
+ file_size = f'{file_size:.2f} MB'
1356
+ else:
1357
+ file_size = file_size / 1024
1358
+ file_size = f'{file_size:.2f} KB'
1359
+ mod_time = os.path.getmtime(os.path.join(root, name))
1360
+ local_time = time.localtime(mod_time)
1361
+ mod_time_formatted = time.strftime('%Y-%m-%d %H:%M:%S', local_time)
1362
+
1363
+ # 读取PDF文件为二进制数据
1364
+ with open(os.path.join(root, name), 'rb') as file:
1365
+ pdf_data = file.read()
1366
+ dict_data = {
1367
+ '日期': datetime.datetime.today().strftime('%Y-%m-%d'),
1368
+ '数据来源': '微信',
1369
+ '文件名称': name,
1370
+ '文件大小': file_size,
1371
+ '修改时间': mod_time_formatted,
1372
+ '数据主体': pdf_data,
1373
+ '扩展名': os.path.splitext(name)[-1],
1374
+ '更新时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
1375
+ }
1376
+ set_typ = {
1377
+ '日期': 'date',
1378
+ '数据来源': 'varchar(100)',
1379
+ '文件名称': 'varchar(255)',
1380
+ '文件大小': 'varchar(20)',
1381
+ '修改时间': 'timestamp',
1382
+ '数据主体': 'longblob',
1383
+ '扩展名': 'varchar(50)',
1384
+ '更新时间': 'timestamp',
1385
+ }
1386
+ m_engine.doc_to_sql(
1387
+ db_name='pdf文件',
1388
+ table_name='微信pdf文件',
1389
+ remove_by_key=['文件名称'],
1390
+ dict_data=dict_data,
1391
+ set_typ=set_typ,
1392
+ allow_not_null=False,
1393
+ filename=name,
1394
+ reset_id=True,
1395
+ )
1396
+ if read_data:
1397
+ filename=''
1398
+ save_path = '/Users/xigua/Downloads'
1399
+ m_engine.read_doc_data(
1400
+ db_name='pdf文件',
1401
+ table_name='微信pdf文件',
1402
+ column='文件名称',
1403
+ filename=filename,
1404
+ save_path=save_path,
1405
+ )
1406
+
1327
1407
  if __name__ == '__main__':
1408
+ doc_to_sql(
1409
+ write_data=True,
1410
+ read_data=False,
1411
+ )
1328
1412
  # cut_as_year_month(as_month=False)
1329
1413
 
1330
1414
  # username = 'root'
@@ -1332,12 +1416,12 @@ if __name__ == '__main__':
1332
1416
  # host = ''
1333
1417
  # port = ''
1334
1418
 
1335
- # 上传 1 个文件到数据库
1336
- one_file_to_mysql(
1337
- file=r'/Users/xigua/Downloads/日期表.csv',
1338
- db_name='聚合数据test',
1339
- table_name='日期表',
1340
- )
1419
+ # # 上传 1 个文件到数据库
1420
+ # one_file_to_mysql(
1421
+ # file=r'/Users/xigua/Downloads/日期表.csv',
1422
+ # db_name='聚合数据test',
1423
+ # table_name='日期表',
1424
+ # )
1341
1425
 
1342
1426
 
1343
1427
  # col = 1
@@ -1895,6 +1895,7 @@ class MysqlDatasQuery:
1895
1895
  start_date, end_date = self.months_data(num=self.months)
1896
1896
  projection = {
1897
1897
  '日期': 1,
1898
+ '场景id': 1,
1898
1899
  '场景名字': 1,
1899
1900
  '花费': 1,
1900
1901
  '展现量': 1,
@@ -1918,10 +1919,10 @@ class MysqlDatasQuery:
1918
1919
  if len(df_tm) > 0:
1919
1920
  df_tm.rename(columns={'场景名字': '营销场景'}, inplace=True)
1920
1921
  df_tm = df_tm.groupby(
1921
- ['日期', '店铺名称', '营销场景', '花费'],
1922
+ ['日期', '店铺名称', '场景id', '营销场景', '花费', '展现量'],
1922
1923
  as_index=False).agg(
1923
1924
  **{
1924
- '展现量': ('展现量', np.max),
1925
+ # '展现量': ('展现量', np.max),
1925
1926
  '点击量': ('点击量', np.max),
1926
1927
  '加购量': ('总购物车数', np.max),
1927
1928
  '成交笔数': ('总成交笔数', np.max),
@@ -1942,10 +1943,10 @@ class MysqlDatasQuery:
1942
1943
  if len(df_tb) > 0:
1943
1944
  df_tb.rename(columns={'场景名字': '营销场景'}, inplace=True)
1944
1945
  df_tb = df_tb.groupby(
1945
- ['日期', '店铺名称', '营销场景', '花费'],
1946
+ ['日期', '店铺名称', '场景id', '营销场景', '花费', '展现量'],
1946
1947
  as_index=False).agg(
1947
1948
  **{
1948
- '展现量': ('展现量', np.max),
1949
+ # '展现量': ('展现量', np.max),
1949
1950
  '点击量': ('点击量', np.max),
1950
1951
  '加购量': ('总购物车数', np.max),
1951
1952
  '成交笔数': ('总成交笔数', np.max),
mdbq/mysql/mysql.py CHANGED
@@ -4,6 +4,7 @@ import platform
4
4
  import getpass
5
5
  import re
6
6
  import time
7
+ from fileinput import filename
7
8
  from functools import wraps
8
9
  import warnings
9
10
  import pymysql
@@ -117,13 +118,203 @@ class MysqlUpload:
117
118
  print(f'{func.__name__}, {e}') # 将异常信息返回
118
119
  with open(error_file, 'a') as f:
119
120
  now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
120
- f.write(f'\n{now}\n')
121
+ f.write(f'\n{now} \n')
121
122
  # f.write(f'报错的文件:\n{e.__traceback__.tb_frame.f_globals["__file__"]}\n') # 发生异常所在的文件
122
123
  traceback.print_exc(file=open(error_file, 'a')) # 返回完整的堆栈信息
123
124
  print(f'更多信息请查看日志文件: {error_file}')
124
125
 
125
126
  return wrapper
126
127
 
128
+ def cover_doc_dtypes(self, dict_data):
129
+ """ 清理字典键值 并转换数据类型 """
130
+ if not dict_data:
131
+ print(f'mysql.py -> MysqlUpload -> cover_dict_dtypes -> 传入的字典不能为空')
132
+ return
133
+ __res_dict = {}
134
+ new_dict_data = {}
135
+ for k, v in dict_data.items():
136
+ k = str(k).lower()
137
+ k = re.sub(r'[()\-,,$&~^、 ()\"\'“”=·/。》《><!!`]', '_', k, re.IGNORECASE)
138
+ k = k.replace(')', '')
139
+ k = re.sub(r'_{2,}', '_', k)
140
+ k = re.sub(r'_+$', '', k)
141
+ result1 = re.findall(r'编码|_?id|货号|款号|文件大小', k, re.IGNORECASE)
142
+ result2 = re.findall(r'占比$|投产$|产出$|roi$|率$', k, re.IGNORECASE)
143
+ result3 = re.findall(r'同比$|环比$', k, re.IGNORECASE)
144
+ result4 = re.findall(r'花费$|消耗$|金额$', k, re.IGNORECASE)
145
+
146
+ date_type = is_valid_date(v) # 判断日期时间
147
+ int_num = is_integer(v) # 判断整数
148
+ count_int, count_float = count_decimal_places(v) # 判断小数,返回小数位数
149
+ if result1: # 京东sku/spu商品信息
150
+ __res_dict.update({k: 'varchar(100)'})
151
+ elif k == '日期':
152
+ __res_dict.update({k: 'DATE'})
153
+ elif k == '更新时间':
154
+ __res_dict.update({k: 'TIMESTAMP'})
155
+ elif result2: # 小数
156
+ __res_dict.update({k: 'decimal(10,4)'})
157
+ elif date_type == 1: # 纯日期
158
+ __res_dict.update({k: 'DATE'})
159
+ elif date_type == 2: # 日期+时间
160
+ __res_dict.update({k: 'DATETIME'})
161
+ elif int_num:
162
+ __res_dict.update({k: 'INT'})
163
+ elif count_float > 0:
164
+ if count_int + count_float > 10:
165
+ # if count_float > 5:
166
+ # v = round(float(v), 4)
167
+ if count_float >= 6:
168
+ __res_dict.update({k: 'decimal(14,6)'})
169
+ else:
170
+ __res_dict.update({k: 'decimal(14,4)'})
171
+ elif count_float >= 6:
172
+ __res_dict.update({k: 'decimal(14,6)'})
173
+ elif count_float >= 4:
174
+ __res_dict.update({k: 'decimal(12,4)'})
175
+ else:
176
+ __res_dict.update({k: 'decimal(10,2)'})
177
+ else:
178
+ __res_dict.update({k: 'varchar(255)'})
179
+ new_dict_data.update({k: v})
180
+ __res_dict.update({'数据主体': 'longblob'})
181
+ return __res_dict, new_dict_data
182
+
183
+ # @try_except
184
+ def doc_to_sql(self, db_name, table_name, dict_data, set_typ={}, remove_by_key=None, allow_not_null=False, filename=None, reset_id=False):
185
+ """
186
+ db_name:
187
+ table_name:
188
+ remove_by_key: 设置时先删除数据再插入,不设置则直接添加
189
+ dict_data:
190
+ set_typ:
191
+ allow_not_null:
192
+ filename:
193
+ reset_id:
194
+ """
195
+ if '数据主体' not in dict_data.keys():
196
+ print(f'dict_data 中"数据主体"键不能为空')
197
+ return
198
+
199
+ connection = pymysql.connect(**self.config) # 连接数据库
200
+ with connection.cursor() as cursor:
201
+ cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
202
+ database_exists = cursor.fetchone()
203
+ if not database_exists:
204
+ # 如果数据库不存在,则新建
205
+ if '8.138.27' in str(self.host) or platform.system() == "Linux": # 阿里云 mysql 低版本不支持 0900
206
+ sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_unicode_ci"
207
+ self.config.update({'charset': 'utf8mb4_unicode_ci'})
208
+ if '192.168.1.100' in str(self.host):
209
+ sql = f"CREATE DATABASE `{db_name}`"
210
+ else:
211
+ sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_0900_ai_ci"
212
+ cursor.execute(sql)
213
+ connection.commit()
214
+ print(f"创建Database: {db_name}")
215
+
216
+ self.config.update({'database': db_name}) # 添加更新 config 字段
217
+ connection = pymysql.connect(**self.config) # 重新连接数据库
218
+ with connection.cursor() as cursor:
219
+ # 1. 查询表, 不存在则创建一个空表
220
+ sql = "SHOW TABLES LIKE %s;" # 有特殊字符不需转义
221
+ cursor.execute(sql, (table_name))
222
+ if not cursor.fetchone():
223
+ sql = f"CREATE TABLE IF NOT EXISTS `{table_name}` (id INT AUTO_INCREMENT PRIMARY KEY);"
224
+ cursor.execute(sql)
225
+ print(f'创建 mysql 表: {table_name}')
226
+
227
+ new_dict = {}
228
+ [new_dict.update({k: v}) for k, v in dict_data.items() if k != '数据主体']
229
+ # 清理列名中的非法字符
230
+ dtypes, new_dict = self.cover_doc_dtypes(new_dict)
231
+ if set_typ:
232
+ # 更新自定义的列数据类型
233
+ for k, v in dtypes.items():
234
+ # 确保传进来的 set_typ 键存在于实际的 df 列才 update
235
+ [dtypes.update({k: inside_v}) for inside_k, inside_v in set_typ.items() if k == inside_k]
236
+
237
+ # 检查列
238
+ sql = "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s;"
239
+ cursor.execute(sql, (db_name, table_name))
240
+ col_exist = [item['COLUMN_NAME'] for item in cursor.fetchall()] # 已存在的所有列
241
+
242
+ col_not_exist = [col for col in set_typ.keys() if col not in col_exist] # 不存在的列
243
+ # 不存在则新建列
244
+ if col_not_exist: # 数据表中不存在的列
245
+ for col in col_not_exist:
246
+ # 创建列,需转义
247
+ if allow_not_null:
248
+ sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {set_typ[col]};"
249
+ else:
250
+ sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {set_typ[col]} NOT NULL;"
251
+ cursor.execute(sql)
252
+ print(f"添加列: {col}({set_typ[col]})") # 添加列并指定数据类型
253
+
254
+ if col == '日期':
255
+ sql = f"CREATE INDEX index_name ON `{table_name}`(`{col}`);"
256
+ print(f"设置为索引: {col}({set_typ[col]})")
257
+ cursor.execute(sql)
258
+ connection.commit() # 提交事务
259
+
260
+ if remove_by_key:
261
+ # 删除数据
262
+ se_key = ', '.join(remove_by_key)
263
+ condition = []
264
+ for up_col in remove_by_key:
265
+ condition += [f'`{up_col}` = "{dict_data[up_col]}"']
266
+ condition = ' AND '.join(condition)
267
+ # print(condition)
268
+ sql = f"SELECT {se_key} FROM `{table_name}` WHERE {condition}"
269
+ cursor.execute(sql)
270
+ result = cursor.fetchall()
271
+ if result:
272
+ sql = f'DELETE FROM `{table_name}` WHERE {condition};'
273
+ cursor.execute(sql)
274
+
275
+ # 插入数据到数据库
276
+ # 有数据格式错误问题,所以分开处理,将数据主体移到最后面用占位符
277
+ now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
278
+ print(f'{now} 正在更新: mysql ({self.host}:{self.port}) {db_name}/{table_name} -> {filename}')
279
+ if new_dict:
280
+ cols = ', '.join(f"`{item}`" for item in new_dict.keys()) # 列名需要转义
281
+ values = ', '.join([f'"{item}"' for item in new_dict.values()]) # 值要加引号
282
+ cols = ', '.join([cols, '数据主体'])
283
+ binary_data = dict_data['数据主体']
284
+ sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({values}, %s)"
285
+ # print(sql)
286
+ cursor.execute(sql, binary_data)
287
+ else:
288
+ sql = f"""INSERT INTO `{table_name}` (数据主体) VALUES (%s);"""
289
+ cursor.execute(sql, dict_data['数据主体'])
290
+
291
+ if reset_id:
292
+ # 6. 重置自增列
293
+ try:
294
+ # 查询所有复合主键
295
+ sql = (
296
+ f"SELECT `COLUMN_NAME` AS `PrimaryKey` FROM `information_schema`.`COLUMNS` "
297
+ f"WHERE `TABLE_SCHEMA` = '{db_name}'AND `TABLE_NAME` = '{table_name}' AND `COLUMN_KEY` = 'PRI';"
298
+ )
299
+ cursor.execute(sql)
300
+ result = cursor.fetchall() # 复合主键数
301
+ if len(result) <= 1: # 如果存在复合主键,则不能直接删除 id 键,其他主键可能不是唯一,会报错
302
+ cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
303
+ result = cursor.fetchone()
304
+ if result:
305
+ cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
306
+ cursor.execute(
307
+ f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
308
+ cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
309
+ # print(f'重置自增id')
310
+ else:
311
+ print(f'{table_name} 当前表存在复合主键: {result}, 无法重置自增id')
312
+ except Exception as e:
313
+ print(f'{e}')
314
+ connection.rollback()
315
+ connection.commit()
316
+
317
+
127
318
  @try_except
128
319
  def dict_to_mysql(self, db_name, table_name, dict_data, icm_update=None, main_key=None, unique_main_key=None, index_length=100, set_typ=None, allow_not_null=False, cut_data=None):
129
320
  """
@@ -188,7 +379,7 @@ class MysqlUpload:
188
379
  print(f'创建 mysql 表: {table_name}')
189
380
 
190
381
  # 根据 dict_data 的值添加指定的数据类型
191
- dtypes, dict_data = self.cover_dict_dtypes(dict_data=dict_data) # {'店铺名称': 'mediumtext',...}
382
+ dtypes, dict_data = self.cover_dict_dtypes(dict_data=dict_data) # {'店铺名称': 'varchar(100)',...}
192
383
  if set_typ:
193
384
  # 更新自定义的列数据类型
194
385
  for k, v in dtypes.items():
@@ -574,9 +765,9 @@ class MysqlUpload:
574
765
  connection.commit() # 提交事务
575
766
 
576
767
  if df_sql:
577
- now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
768
+ now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
578
769
  print(
579
- f'{now}正在更新: mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count}, {self.filename}')
770
+ f'{now} 正在更新: mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count}, {self.filename}')
580
771
  engine = create_engine(
581
772
  f"mysql+pymysql://{self.username}:{self.password}@{self.host}:{self.port}/{db_name}") # 创建数据库引擎
582
773
  # df.to_csv('/Users/xigua/Downloads/mysql.csv', index=False, header=True, encoding='utf-8_sig')
@@ -784,7 +975,51 @@ class MysqlUpload:
784
975
  connection.commit() # 提交事务
785
976
  connection.close()
786
977
 
787
- # @try_except
978
+ @try_except
979
+ def read_doc_data(self, table_name, db_name='pdf文件', column='文件名', filename=None, save_path='/Users/xigua/Downloads'):
980
+ """
981
+ db_name:
982
+ table_name:
983
+ column: 读取哪一列
984
+ filename: 文件名称
985
+ save_path: 保存位置
986
+ """
987
+ if not filename:
988
+ print(f'未指定文件名: filename')
989
+ return
990
+ connection = pymysql.connect(**self.config) # 连接数据库
991
+ # try:
992
+ with connection.cursor() as cursor:
993
+ cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
994
+ database_exists = cursor.fetchone()
995
+ if not database_exists:
996
+ print(f"Database {db_name} 数据库不存在")
997
+ return
998
+ self.config.update({'database': db_name})
999
+ connection = pymysql.connect(**self.config) # 重新连接数据库
1000
+ with connection.cursor() as cursor:
1001
+ # 1. 查询表
1002
+ sql = "SHOW TABLES LIKE %s;" # 有特殊字符不需转义
1003
+ cursor.execute(sql, (table_name))
1004
+ if not cursor.fetchone():
1005
+ print(f'{table_name} -> 数据表不存在')
1006
+ return
1007
+
1008
+ # 读取数据
1009
+ condition = f'`{column}` = "{filename}"'
1010
+ sql = f"SELECT `{column}`, `数据主体` FROM `{table_name}` WHERE {condition}"
1011
+ cursor.execute(sql)
1012
+ results = cursor.fetchall()
1013
+ if results:
1014
+ for result in results:
1015
+ # 将二进制数据写入到文件
1016
+ with open(os.path.join(save_path, filename), 'wb') as f:
1017
+ f.write(result['数据主体'])
1018
+ now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
1019
+ print(f'{now} 写入本地文件: ({self.host}:{self.port}) {db_name}/{table_name} -> {os.path.join(save_path, filename)}')
1020
+ connection.close()
1021
+
1022
+
788
1023
  def read_mysql(self, table_name, start_date, end_date, db_name='远程数据源', date_name='日期'):
789
1024
  """ 读取指定数据表,可指定日期范围,返回结果: df """
790
1025
  start_date = pd.to_datetime(start_date).strftime('%Y-%m-%d')
@@ -800,8 +1035,8 @@ class MysqlUpload:
800
1035
  print(f"Database {db_name} 数据库不存在")
801
1036
  return df
802
1037
  else:
803
- now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
804
- print(f'{now}mysql 正在查询表: {table_name}, 范围: {start_date}~{end_date}')
1038
+ now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
1039
+ print(f'{now} mysql 正在查询表: {table_name}, 范围: {start_date}~{end_date}')
805
1040
  except:
806
1041
  return df
807
1042
  finally:
@@ -828,11 +1063,11 @@ class MysqlUpload:
828
1063
  if len(df) == 0:
829
1064
  print(f'database: {db_name}, table: {table_name} 查询的数据为空')
830
1065
  else:
831
- now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
1066
+ now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
832
1067
  cost_time = int(time.time() - before_time)
833
1068
  if cost_time < 1:
834
1069
  cost_time = round(time.time() - before_time, 2)
835
- print(f'{now}mysql ({self.host}) 表: {table_name} 获取数据长度: {len(df)}, 用时: {cost_time} 秒')
1070
+ print(f'{now} mysql ({self.host}) 表: {table_name} 获取数据长度: {len(df)}, 用时: {cost_time} 秒')
836
1071
  return df
837
1072
 
838
1073
  def upload_pandas(self, update_path, db_name, days=None):
@@ -860,8 +1095,8 @@ class MysqlUpload:
860
1095
  if name.endswith('.csv') and 'baidu' not in name:
861
1096
  df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
862
1097
  # if '日期' not in df.columns.tolist():
863
- # now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
864
- # print(f'{now}{root_file} 缺少日期列, 不支持上传 mysql')
1098
+ # now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
1099
+ # print(f'{now} {root_file} 缺少日期列, 不支持上传 mysql')
865
1100
  # continue
866
1101
  if '日期' in df.columns.tolist():
867
1102
  df['日期'] = df['日期'].apply(lambda x: pd.to_datetime(x) if x else x)
@@ -873,8 +1108,8 @@ class MysqlUpload:
873
1108
  if f_path.endswith('.csv') and 'baidu' not in f_path:
874
1109
  df = pd.read_csv(f_path, encoding='utf-8_sig', header=0, na_filter=False)
875
1110
  # if '日期' not in df.columns.tolist():
876
- # now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
877
- # print(f'{now}{root_file} 缺少日期列, 不支持上传 mysql')
1111
+ # now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
1112
+ # print(f'{now} {root_file} 缺少日期列, 不支持上传 mysql')
878
1113
  # continue
879
1114
  if '日期' not in df.columns.tolist():
880
1115
  df['日期'] = df['日期'].apply(lambda x: pd.to_datetime(x) if x else x)
@@ -925,7 +1160,7 @@ class OptimizeDatas:
925
1160
  print(f'{func.__name__}, {e}') # 将异常信息返回
926
1161
  with open(error_file, 'a') as f:
927
1162
  now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
928
- f.write(f'\n{now}\n')
1163
+ f.write(f'\n{now} \n')
929
1164
  # f.write(f'报错的文件:\n{e.__traceback__.tb_frame.f_globals["__file__"]}\n') # 发生异常所在的文件
930
1165
  traceback.print_exc(file=open(error_file, 'a')) # 返回完整的堆栈信息
931
1166
  print(f'更多信息请查看日志文件: {error_file}')
@@ -938,8 +1173,8 @@ class OptimizeDatas:
938
1173
  需要设置 self.db_name_lists
939
1174
  """
940
1175
  if not self.db_name_lists:
941
- now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
942
- print(f'{now}尚未设置参数: self.db_name_lists')
1176
+ now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
1177
+ print(f'{now} 尚未设置参数: self.db_name_lists')
943
1178
  return
944
1179
  for db_name in self.db_name_lists:
945
1180
  self.db_name = db_name
@@ -948,13 +1183,13 @@ class OptimizeDatas:
948
1183
  def optimize(self, except_key=['更新时间']):
949
1184
  """ 更新一个数据库 移除冗余数据 """
950
1185
  if not self.db_name:
951
- now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
952
- print(f'{now}尚未设置参数: self.db_name')
1186
+ now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
1187
+ print(f'{now} 尚未设置参数: self.db_name')
953
1188
  return
954
1189
  tables = self.table_list(db_name=self.db_name)
955
1190
  if not tables:
956
- now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
957
- print(f'{now}{self.db_name} -> 数据表不存在')
1191
+ now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
1192
+ print(f'{now} {self.db_name} -> 数据表不存在')
958
1193
  return
959
1194
 
960
1195
  # 日期初始化
@@ -971,8 +1206,8 @@ class OptimizeDatas:
971
1206
  start_date_before = self.start_date
972
1207
  end_date_before = self.end_date
973
1208
 
974
- now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
975
- print(f'{now}mysql({self.host}: {self.port}) {self.db_name} 数据库优化中(日期长度: {self.days} 天)...')
1209
+ now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
1210
+ print(f'{now} mysql({self.host}: {self.port}) {self.db_name} 数据库优化中(日期长度: {self.days} 天)...')
976
1211
  for table_dict in tables:
977
1212
  for key, table_name in table_dict.items():
978
1213
  # if '店铺指标' not in table_name:
@@ -985,8 +1220,8 @@ class OptimizeDatas:
985
1220
  cursor.execute(sql)
986
1221
  result = cursor.fetchone()
987
1222
  if not result:
988
- now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
989
- print(f'{now}数据表: {table_name}, 数据长度为 0')
1223
+ now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
1224
+ print(f'{now} 数据表: {table_name}, 数据长度为 0')
990
1225
  continue # 检查数据表是否为空
991
1226
 
992
1227
  cursor.execute(f"SHOW FULL COLUMNS FROM `{table_name}`") # 查询数据表的列信息
@@ -1042,8 +1277,8 @@ class OptimizeDatas:
1042
1277
  print(f'{e}')
1043
1278
  self.connection.rollback()
1044
1279
  self.connection.close()
1045
- now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
1046
- print(f'{now}mysql({self.host}: {self.port}) {self.db_name} 数据库优化完成!')
1280
+ now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
1281
+ print(f'{now} mysql({self.host}: {self.port}) {self.db_name} 数据库优化完成!')
1047
1282
 
1048
1283
  def delete_duplicate(self, table_name, date, except_key=['更新时间']):
1049
1284
  datas = self.table_datas(db_name=self.db_name, table_name=str(table_name), date=date)
@@ -1076,8 +1311,8 @@ class OptimizeDatas:
1076
1311
  # 移除冗余数据
1077
1312
  sql = f"DELETE FROM `{table_name}` WHERE id IN ({placeholders})"
1078
1313
  cursor.execute(sql, duplicate_id)
1079
- now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
1080
- print(f"{now}{table_name} -> {date.strftime('%Y-%m-%d')} before: {len(datas)}, remove: {cursor.rowcount}")
1314
+ now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
1315
+ print(f"{now} {table_name} -> {date.strftime('%Y-%m-%d')} before: {len(datas)}, remove: {cursor.rowcount}")
1081
1316
  self.connection.commit() # 提交事务
1082
1317
  except Exception as e:
1083
1318
  print(f'{self.db_name}/{table_name}, {e}')
@@ -1114,8 +1349,8 @@ class OptimizeDatas:
1114
1349
  # 移除冗余数据
1115
1350
  sql = f"DELETE FROM `{table_name}` WHERE id IN ({placeholders})"
1116
1351
  cursor.execute(sql, duplicate_id)
1117
- now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
1118
- print(f"{now}{table_name} -> before: {len(datas)}, "
1352
+ now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
1353
+ print(f"{now} {table_name} -> before: {len(datas)}, "
1119
1354
  f"remove: {cursor.rowcount}")
1120
1355
  self.connection.commit() # 提交事务
1121
1356
  except Exception as e:
@@ -1139,8 +1374,8 @@ class OptimizeDatas:
1139
1374
  cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
1140
1375
  database_exists = cursor.fetchone()
1141
1376
  if not database_exists:
1142
- now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
1143
- print(f'{now}{db_name}: 数据表不存在!')
1377
+ now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
1378
+ print(f'{now} {db_name}: 数据表不存在!')
1144
1379
  return
1145
1380
  except Exception as e:
1146
1381
  print(f'002 {e}')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 3.2.11
3
+ Version: 3.2.13
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,11 +1,11 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
2
  mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
- mdbq/aggregation/aggregation.py,sha256=ifactmmJYkm0UUgQG9rn6ledHnc1eFXXPdDywdB-690,76622
4
+ mdbq/aggregation/aggregation.py,sha256=-yzApnlqSN2L0E1YMu5ml-W827qpKQvWPCOI7jj2kzY,80264
5
5
  mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
6
6
  mdbq/aggregation/mysql_types.py,sha256=YTGyrF9vcRgfkQbpT-e-JdJ7c7VF1dDHgyx9YZRES8w,10934
7
7
  mdbq/aggregation/optimize_data.py,sha256=RXIv7cACCgYyehAxMjUYi_S7rVyjIwXKWMaM3nduGtA,3068
8
- mdbq/aggregation/query_data.py,sha256=lXJMlyHM9cSVD7rgf4dHR5NZwMXPt3OjM1Q91DYwbZc,148259
8
+ mdbq/aggregation/query_data.py,sha256=2--y1VNYhL7lCeVA9WjIHiz3K_2JYm9agFqWd5jaeIc,148341
9
9
  mdbq/aggregation/query_data_bak.py,sha256=r1FU0C4zjXln7oVSrRkElh4Ehl-9mYhGcq57jLbViUA,104071
10
10
  mdbq/aggregation/query_data_bak20241124.py,sha256=oY95ZK3qt3Wx9pdZKZ5cvDh45Yi5yGj1kl8G6riumHA,144513
11
11
  mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
@@ -28,7 +28,7 @@ mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
28
28
  mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
29
29
  mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
30
30
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
31
- mdbq/mysql/mysql.py,sha256=-mM51DUH2BWjRSF1ySPFGxMq1fIi5vI9UwAH5X1-nQ4,67760
31
+ mdbq/mysql/mysql.py,sha256=ZG6BMfoXg6YGnHqv7GfwPwd7RXjoetCAFqPnbdHWqOM,79507
32
32
  mdbq/mysql/recheck_mysql.py,sha256=rgTpvDMWYTyEn7UQdlig-pdXDluTgiU8JG6lkMh8DV0,8665
33
33
  mdbq/mysql/s_query.py,sha256=MbIprZ4yJDAZ9AahZPzl7hqS695Vs0P-AJNwAtA_EEc,9287
34
34
  mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
@@ -46,7 +46,7 @@ mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
46
46
  mdbq/req_post/req_tb.py,sha256=qg7pet73IgKGmCwxaeUyImJIoeK_pBQT9BBKD7fkBNg,36160
47
47
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
48
48
  mdbq/spider/aikucun.py,sha256=nIKKZOZbemKqcrikcrMmtksLgJjjzeU0I99teBgU1jE,22439
49
- mdbq-3.2.11.dist-info/METADATA,sha256=dRsOoo5Ocv2miwmeKzk3OMkmgBfN0LKuQ7fnj6YqtJQ,244
50
- mdbq-3.2.11.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
51
- mdbq-3.2.11.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
52
- mdbq-3.2.11.dist-info/RECORD,,
49
+ mdbq-3.2.13.dist-info/METADATA,sha256=dscbnTOvut2B3ZyaE7w2p8TUt1vEG1JwxjBxNoR0NFQ,244
50
+ mdbq-3.2.13.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
51
+ mdbq-3.2.13.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
52
+ mdbq-3.2.13.dist-info/RECORD,,
File without changes