mdbq 0.4.5__tar.gz → 0.4.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {mdbq-0.4.5 → mdbq-0.4.6}/PKG-INFO +1 -1
  2. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/aggregation/aggregation.py +2 -2
  3. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/aggregation/query_data.py +1 -1
  4. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/dataframe/converter.py +45 -0
  5. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/mysql/mysql.py +109 -3
  6. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq.egg-info/PKG-INFO +1 -1
  7. {mdbq-0.4.5 → mdbq-0.4.6}/setup.py +1 -1
  8. {mdbq-0.4.5 → mdbq-0.4.6}/README.txt +0 -0
  9. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/__init__.py +0 -0
  10. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/__version__.py +0 -0
  11. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/aggregation/__init__.py +0 -0
  12. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/aggregation/optimize_data.py +0 -0
  13. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/bdup/__init__.py +0 -0
  14. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/bdup/bdup.py +0 -0
  15. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/clean/__init__.py +0 -0
  16. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/clean/data_clean.py +0 -0
  17. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/company/__init__.py +0 -0
  18. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/company/copysh.py +0 -0
  19. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/config/__init__.py +0 -0
  20. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/config/get_myconf.py +0 -0
  21. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/config/products.py +0 -0
  22. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/config/set_support.py +0 -0
  23. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/config/update_conf.py +0 -0
  24. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/dataframe/__init__.py +0 -0
  25. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/log/__init__.py +0 -0
  26. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/log/mylogger.py +0 -0
  27. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/mongo/__init__.py +0 -0
  28. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/mongo/mongo.py +0 -0
  29. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/mysql/__init__.py +0 -0
  30. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/mysql/data_types.py +0 -0
  31. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/mysql/s_query.py +0 -0
  32. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/mysql/year_month_day.py +0 -0
  33. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/other/__init__.py +0 -0
  34. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/other/porxy.py +0 -0
  35. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/other/pov_city.py +0 -0
  36. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/other/ua_sj.py +0 -0
  37. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/pbix/__init__.py +0 -0
  38. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/pbix/pbix_refresh.py +0 -0
  39. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/pbix/refresh_all.py +0 -0
  40. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq/spider/__init__.py +0 -0
  41. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq.egg-info/SOURCES.txt +0 -0
  42. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq.egg-info/dependency_links.txt +0 -0
  43. {mdbq-0.4.5 → mdbq-0.4.6}/mdbq.egg-info/top_level.txt +0 -0
  44. {mdbq-0.4.5 → mdbq-0.4.6}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 0.4.5
3
+ Version: 0.4.6
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1035,5 +1035,5 @@ if __name__ == '__main__':
1035
1035
  # username, password, host, port = get_myconf.select_config_values(target_service='nas', database='mysql')
1036
1036
  # print(username, password, host, port)
1037
1037
 
1038
- main()
1039
- # upload()
1038
+ # main()
1039
+ upload()
@@ -525,5 +525,5 @@ def data_aggregation(service_databases=[{}]):
525
525
 
526
526
 
527
527
  if __name__ == '__main__':
528
- data_aggregation(service_databases=[{'home_lx': 'mysql'}])
528
+ data_aggregation(service_databases=[{'company': 'mysql'}])
529
529
  # optimize_data.op_data(service_databases=[{'company': 'mysql'}], days=3650) # 立即启动对聚合数据的清理工作
@@ -9,6 +9,51 @@ class DataFrameConverter(object):
9
9
  self.df = df
10
10
 
11
11
  def convert_df_cols(self, df=pd.DataFrame({})):
12
+ """
13
+ 清理 dataframe 非法值
14
+ 对数据类型进行转换(尝试将 object 类型转为 int 或 float)
15
+ """
16
+ if len(df) == 0:
17
+ df = self.df
18
+ if len(df) == 0:
19
+ return
20
+ # dtypes = df.dtypes.apply(str).to_dict() # 将 dataframe 数据类型转为字典形式
21
+ df.replace([np.inf, -np.inf], 0, inplace=True) # 清理一些非法值
22
+ df.replace(to_replace=['\\N', '-', '--', '', 'nan'], value=0, regex=False, inplace=True) # 替换掉特殊字符
23
+ df.replace(to_replace=[','], value='', regex=True, inplace=True)
24
+ df.replace(to_replace=['="'], value='', regex=True, inplace=True) # ="和"不可以放在一起清洗, 因为有: id=86785565
25
+ df.replace(to_replace=['"'], value='', regex=True, inplace=True)
26
+ cols = df.columns.tolist()
27
+
28
+ for col in cols:
29
+ # df[col] = df[col].apply(lambda x: re.sub('[="]', '', str(x)) if '="' in str(x) else x)
30
+ # 百分比在某些数据库中不兼容, 转换百分比为小数
31
+ df[col] = df[col].apply(lambda x: float(float((str(x).rstrip("%"))) / 100) if str(x).endswith('%') and '~' not in str(x) else x)
32
+ # 尝试转换合适的数据类型
33
+ if df[col].dtype == 'object':
34
+ try:
35
+ # df[col] = df[col].astype(int) # 尝试转换 int
36
+ df[col] = df[col].apply(lambda x: int(x) if '_' not in str(x) else x)
37
+ except:
38
+ # df[col] = df[col].astype('float64', errors='ignore') # 尝试转换 float, 报错则忽略
39
+ try:
40
+ df[col] = df[col].apply(lambda x: float(x) if '_' not in str(x) else x)
41
+ except:
42
+ pass
43
+ if df[col].dtype == 'float': # 对于小数类型, 保留 6 位小数
44
+ df[col] = df[col].apply(lambda x: round(float(x), 6) if x != 0 else x)
45
+ # 清理列名, 在 mysql 里面列名不能含有某些特殊字符
46
+ if '日期' in col or '时间' in col:
47
+ try:
48
+ df[col] = df[col].apply(lambda x: pd.to_datetime(x))
49
+ except:
50
+ pass
51
+ new_col = col.lower()
52
+ df.rename(columns={col: new_col}, inplace=True)
53
+ df.fillna(0, inplace=True)
54
+ return df
55
+
56
+ def convert_df_cols_bak(self, df=pd.DataFrame({})):
12
57
  """
13
58
  清理 dataframe 列名的不合规字符(mysql)
14
59
  对数据类型进行转换(尝试将 object 类型转为 int 或 float)
@@ -9,6 +9,7 @@ import warnings
9
9
  import pymysql
10
10
  import numpy as np
11
11
  import pandas as pd
12
+ from more_itertools.more import iequals
12
13
  from sqlalchemy import create_engine
13
14
  import os
14
15
  import calendar
@@ -63,7 +64,7 @@ class MysqlUpload:
63
64
  return False
64
65
 
65
66
  # @try_except
66
- def df_to_mysql(self, df, tabel_name, db_name='远程数据源'):
67
+ def df_to_mysql_bak(self, df, tabel_name, db_name='远程数据源'):
67
68
  """
68
69
  将 df 写入数据库
69
70
  db_name: 数据库名称
@@ -182,6 +183,106 @@ class MysqlUpload:
182
183
  finally:
183
184
  connection.close()
184
185
 
186
+ def df_to_mysql(self, df, tabel_name, db_name='远程数据源', drop_duplicates=False):
187
+ """
188
+ 将 df 写入数据库
189
+ db_name: 数据库名称
190
+ tabel_name: 集合/表名称
191
+ drop_duplicates:仅限于聚合数据使用,其他情况不要设置
192
+ """
193
+ cv = converter.DataFrameConverter()
194
+ df = cv.convert_df_cols(df=df) # 清理 dataframe 非法值
195
+
196
+ connection = pymysql.connect(**self.config) # 连接数据库
197
+ with connection.cursor() as cursor:
198
+ cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
199
+ database_exists = cursor.fetchone()
200
+ if not database_exists:
201
+ # 如果数据库不存在,则新建
202
+ if '8.138.27' in str(self.host) or platform.system() == "Linux": # 阿里云 mysql 低版本不支持 0900
203
+ sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_unicode_ci"
204
+ self.config.update({'charset': 'utf8mb4_unicode_ci'})
205
+ if '192.168.1.100' in str(self.host):
206
+ sql = f"CREATE DATABASE `{db_name}`"
207
+ else:
208
+ sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_0900_ai_ci"
209
+ cursor.execute(sql)
210
+ connection.commit()
211
+ print(f"创建Database: {db_name}")
212
+
213
+ self.config.update({'database': db_name}) # 添加更新 config 字段
214
+ connection = pymysql.connect(**self.config) # 重新连接数据库
215
+ with connection.cursor() as cursor:
216
+ # 1. 查询表, 不存在则创建一个空表
217
+ sql = f"SHOW TABLES LIKE '{tabel_name}';" # 有特殊字符不需转义
218
+ cursor.execute(sql)
219
+ if not cursor.fetchone():
220
+ sql = f"CREATE TABLE IF NOT EXISTS `{tabel_name}` (id INT AUTO_INCREMENT PRIMARY KEY)"
221
+ cursor.execute(sql)
222
+ print(f'创建 mysql 表: {tabel_name}')
223
+
224
+ # 2. 列数据类型转换
225
+ dtypes = self.convert_dtypes(df=df, db_name=db_name, tabel_name=tabel_name)
226
+
227
+ # 有特殊字符不需转义
228
+ sql = f"SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '{db_name}' AND TABLE_NAME = '{tabel_name}';"
229
+ cursor.execute(sql)
230
+ col_exist = [item['COLUMN_NAME'] for item in cursor.fetchall()]
231
+ cols = df.columns.tolist()
232
+ col_not_exist = [col for col in cols if col not in col_exist]
233
+ # 检查列,不存在则新建列
234
+ if col_not_exist: # 数据表中不存在的列
235
+ for col in col_not_exist:
236
+ try:
237
+ # 创建列,需转义
238
+ sql = f"ALTER TABLE `{tabel_name}` ADD COLUMN `{col}` {dtypes[col]} DEFAULT NULL;"
239
+ cursor.execute(sql)
240
+ print(f"添加列: {col}({dtypes[col]})") # 添加列并指定数据类型
241
+
242
+ # 创建索引
243
+ if col == '日期':
244
+ cursor.execute(f"SHOW INDEXES FROM `{tabel_name}` WHERE `Column_name` = '{col}'")
245
+ result = cursor.fetchone() # 检查索引是否存在
246
+ if not result:
247
+ cursor.execute(f"CREATE INDEX index_name ON `{tabel_name}`(`{col}`)")
248
+ except:
249
+ pass
250
+ connection.commit() # 提交事务
251
+
252
+ # 4. 移除指定日期范围内的数据,仅限于聚合数据使用,其他情况不要设置
253
+ if drop_duplicates and '日期' in df.columns.tolist():
254
+ dates = df['日期'].values.tolist()
255
+ start_date = pd.to_datetime(min(dates)).strftime('%Y-%m-%d')
256
+ end_date = (pd.to_datetime(max(dates)) + datetime.timedelta(days=1)).strftime('%Y-%m-%d')
257
+ sql = f"DELETE FROM `{tabel_name}` WHERE {'日期'} BETWEEN '%s' AND '%s'" % (start_date, end_date)
258
+ cursor.execute(sql)
259
+ connection.commit()
260
+
261
+ # 5. 更新插入数据
262
+ now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
263
+ print(f'{now}正在更新 mysql ({self.host}:{self.port}) {db_name}/{tabel_name}')
264
+ datas = df.to_dict(orient='records')
265
+ for data in datas:
266
+ try:
267
+ cols = ', '.join(f"`{item}`" for item in data.keys()) # 列名转义
268
+ # data.update({item: f"{data[item]}" for item in data.keys()}) # 全部值转字符, 不是必须的
269
+ values = ', '.join([f"'{item}'" for item in data.values()]) # 值要加单引号 ''
270
+ condition = []
271
+ for k, v in data.items():
272
+ condition += [f"`{k}` = '{v}'"]
273
+ condition = ' AND '.join(condition) # 构建查询条件
274
+ # print(condition)
275
+
276
+ sql = f"SELECT {cols} FROM `{tabel_name}` WHERE {condition}"
277
+ cursor.execute(sql)
278
+ result = cursor.fetchall() # 获取查询结果, 如果有结果返回 list,没有则返回空元组 tuple
279
+ if not result: # 数据不存在则插入
280
+ sql = f"INSERT INTO `{tabel_name}` ({cols}) VALUES ({values});"
281
+ cursor.execute(sql)
282
+ except:
283
+ pass
284
+ connection.commit() # 提交事务
285
+
185
286
  def convert_dtypes(self, df, db_name, tabel_name):
186
287
  """
187
288
  根据本地已经存在的记录着 mysql dtypes 的 json 文件转换 df 的类型为 mysql 专有的数据类型
@@ -240,7 +341,7 @@ class MysqlUpload:
240
341
  return 'mediumtext'
241
342
  return 'INT'
242
343
  elif dtype == 'float64':
243
- return 'FLOAT'
344
+ return 'double' # mysql 中不要使用 float 类型,会影响计算结果
244
345
  elif dtype == 'object':
245
346
  return 'mediumtext'
246
347
  else:
@@ -689,8 +790,13 @@ def download_datas(tabel_name, save_path, start_date):
689
790
 
690
791
 
691
792
  if __name__ == '__main__':
692
- username, password, host, port = get_myconf.select_config_values(target_service='home_lx', database='mysql')
793
+ username, password, host, port = get_myconf.select_config_values(target_service='company', database='mysql')
693
794
  print(username, password, host, port)
694
795
 
796
+ df = pd.read_csv('/Users/xigua/Downloads/余额查询.csv', encoding='utf-8_sig', header=0, na_filter=False)
797
+ # df = df.to_dict(orient='records')
798
+ m = MysqlUpload(username=username, password=password, host=host, port=port)
799
+ m.df_to_mysql_new(df=df, db_name='te2- %s t', tabel_name='测 -sdf @%试 表')
800
+
695
801
 
696
802
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 0.4.5
3
+ Version: 0.4.6
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -3,7 +3,7 @@
3
3
  from setuptools import setup, find_packages
4
4
 
5
5
  setup(name='mdbq',
6
- version='0.4.5',
6
+ version='0.4.6',
7
7
  author='xigua, ',
8
8
  author_email="2587125111@qq.com",
9
9
  url='https://pypi.org/project/mdbsql',
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes