mdbq 1.3.8__tar.gz → 1.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {mdbq-1.3.8 → mdbq-1.4.0}/PKG-INFO +1 -1
  2. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/aggregation/aggregation.py +7 -2
  3. mdbq-1.4.0/mdbq/aggregation/optimize_data.py +61 -0
  4. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/company/copysh.py +36 -19
  5. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/mysql/mysql.py +2 -2
  6. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq.egg-info/PKG-INFO +1 -1
  7. {mdbq-1.3.8 → mdbq-1.4.0}/setup.py +1 -1
  8. mdbq-1.3.8/mdbq/aggregation/optimize_data.py +0 -22
  9. {mdbq-1.3.8 → mdbq-1.4.0}/README.txt +0 -0
  10. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/__init__.py +0 -0
  11. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/__version__.py +0 -0
  12. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/aggregation/__init__.py +0 -0
  13. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/aggregation/df_types.py +0 -0
  14. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/aggregation/mysql_types.py +0 -0
  15. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/aggregation/query_data.py +0 -0
  16. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/bdup/__init__.py +0 -0
  17. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/bdup/bdup.py +0 -0
  18. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/clean/__init__.py +0 -0
  19. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/clean/data_clean.py +0 -0
  20. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/company/__init__.py +0 -0
  21. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/config/__init__.py +0 -0
  22. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/config/get_myconf.py +0 -0
  23. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/config/products.py +0 -0
  24. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/config/set_support.py +0 -0
  25. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/config/update_conf.py +0 -0
  26. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/dataframe/__init__.py +0 -0
  27. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/dataframe/converter.py +0 -0
  28. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/log/__init__.py +0 -0
  29. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/log/mylogger.py +0 -0
  30. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/mongo/__init__.py +0 -0
  31. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/mongo/mongo.py +0 -0
  32. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/mysql/__init__.py +0 -0
  33. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/mysql/s_query.py +0 -0
  34. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/mysql/year_month_day.py +0 -0
  35. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/other/__init__.py +0 -0
  36. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/other/porxy.py +0 -0
  37. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/other/pov_city.py +0 -0
  38. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/other/ua_sj.py +0 -0
  39. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/pbix/__init__.py +0 -0
  40. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/pbix/pbix_refresh.py +0 -0
  41. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/pbix/refresh_all.py +0 -0
  42. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq/spider/__init__.py +0 -0
  43. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq.egg-info/SOURCES.txt +0 -0
  44. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq.egg-info/dependency_links.txt +0 -0
  45. {mdbq-1.3.8 → mdbq-1.4.0}/mdbq.egg-info/top_level.txt +0 -0
  46. {mdbq-1.3.8 → mdbq-1.4.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 1.3.8
3
+ Version: 1.4.0
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -549,6 +549,7 @@ class DatabaseUpdate:
549
549
  '数据库名': db_name,
550
550
  '集合名称': collection_name,
551
551
  '数据主体': df,
552
+ '文件名': name,
552
553
  }
553
554
  )
554
555
 
@@ -585,6 +586,7 @@ class DatabaseUpdate:
585
586
  '数据库名': db_name,
586
587
  '集合名称': collection_name,
587
588
  '数据主体': df,
589
+ '文件名': name,
588
590
  }
589
591
  )
590
592
  if is_move:
@@ -596,6 +598,7 @@ class DatabaseUpdate:
596
598
  '数据库名': '聚合数据',
597
599
  '集合名称': '日期表',
598
600
  '数据主体': df,
601
+ '文件名': '日期表文件名',
599
602
  }
600
603
  )
601
604
 
@@ -641,7 +644,7 @@ class DatabaseUpdate:
641
644
  port=port,
642
645
  )
643
646
  for data in self.datas:
644
- df, db_name, collection_name = data['数据主体'], data['数据库名'], data['集合名称']
647
+ df, db_name, collection_name, rt_filename = data['数据主体'], data['数据库名'], data['集合名称'], data['文件名']
645
648
  df_to_json.get_df_types(
646
649
  df=df,
647
650
  db_name=db_name,
@@ -653,7 +656,9 @@ class DatabaseUpdate:
653
656
  db_name=db_name,
654
657
  table_name=collection_name,
655
658
  df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
656
- drop_dup=True # 值为 True 时检查重复数据再插入,反之直接上传
659
+ drop_dup=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
660
+ filename=rt_filename, # 用来追踪处理进度
661
+ system_name=system_name, # 用来追踪处理进度
657
662
  )
658
663
  df_to_json.as_json_file() # 写入 json 文件, 包含数据的 dtypes 信息
659
664
 
@@ -0,0 +1,61 @@
1
+ # -*- coding: UTF-8 –*-
2
+ from mdbq.mysql import mysql
3
+ from mdbq.config import get_myconf
4
+ """
5
+ 对指定数据库所有冗余数据进行清理
6
+ """
7
+
8
+
9
+ def op_data(db_name_lists, service_databases=None, days: int = 63):
10
+ """ service_databases 这个参数暂时没有用 """
11
+ # for service_database in service_databases:
12
+ # for service_name, database in service_database.items():
13
+ # username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
14
+ # s = mysql.OptimizeDatas(username=username, password=password, host=host, port=port)
15
+ # s.db_name_lists = [
16
+ # '聚合数据',
17
+ # ]
18
+ # s.days = days
19
+ # s.optimize_list()
20
+
21
+ if socket.gethostname() == 'xigua_lx' or socket.gethostname() == 'xigua1' or socket.gethostname() == 'Mac2.local':
22
+ # mongodb
23
+ username, password, host, port = get_myconf.select_config_values(
24
+ target_service='home_lx',
25
+ database='mongodb',
26
+ )
27
+ m = mongo.OptimizeDatas(username=username, password=password, host=host, port=port)
28
+ m.db_name_lists = db_name_lists
29
+ m.days = days
30
+ m.optimize_list()
31
+ if m.client:
32
+ m.client.close()
33
+ print(f'已关闭 mongodb 连接')
34
+
35
+ if socket.gethostname() == 'xigua_lx':
36
+ restart_mongodb() # mongodb 太占内存了, 重启服务, 释放内存
37
+
38
+ # Mysql
39
+ username, password, host, port = get_myconf.select_config_values(
40
+ target_service='home_lx',
41
+ database='mysql',
42
+ )
43
+ s = mysql.OptimizeDatas(username=username, password=password, host=host, port=port)
44
+ s.db_name_lists = db_name_lists
45
+ s.days = days
46
+ s.optimize_list()
47
+
48
+ elif socket.gethostname() == 'company':
49
+ # Mysql
50
+ username, password, host, port = get_myconf.select_config_values(
51
+ target_service='company',
52
+ database='mysql',
53
+ )
54
+ s = mysql.OptimizeDatas(username=username, password=password, host=host, port=port)
55
+ s.db_name_lists = db_name_lists
56
+ s.days = days
57
+ s.optimize_list()
58
+
59
+
60
+ if __name__ == '__main__':
61
+ op_data(service_databases=[{'home_lx': 'mysql'}], days=3650)
@@ -293,27 +293,44 @@ def op_data(days: int =3650):
293
293
 
294
294
  # 清理数据库, 除了 聚合数据
295
295
  if socket.gethostname() == 'company': # 公司台式机自身运行
296
- # Mysql
297
- username, password, host, port = get_myconf.select_config_values(
298
- target_service='company',
299
- database='mysql',
296
+ # # Mysql
297
+ # username, password, host, port = get_myconf.select_config_values(
298
+ # target_service='company',
299
+ # database='mysql',
300
+ # )
301
+ # s = mysql.OptimizeDatas(username=username, password=password, host=host, port=port)
302
+ # s.db_name_lists = [
303
+ # '京东数据2',
304
+ # '推广数据2',
305
+ # '市场数据2',
306
+ # '生意参谋2',
307
+ # '生意经2',
308
+ # '属性设置2',
309
+ # # '聚合数据', # 不在这里清理聚合数据, 还未开始聚合呢
310
+ # ]
311
+ # s.days = days
312
+ # s.optimize_list()
313
+
314
+ # 清理所有非聚合数据的库
315
+ optimize_data.op_data(
316
+ db_name_lists=[
317
+ '京东数据2',
318
+ '推广数据2',
319
+ '市场数据2',
320
+ '生意参谋2',
321
+ '生意经2',
322
+ '属性设置2',
323
+ # '聚合数据', # 不在这里清理聚合数据, 还未开始聚合呢
324
+ ],
325
+ days=3650,
300
326
  )
301
- s = mysql.OptimizeDatas(username=username, password=password, host=host, port=port)
302
- s.db_name_lists = [
303
- '京东数据2',
304
- '推广数据2',
305
- '市场数据2',
306
- '生意参谋2',
307
- '生意经2',
308
- '属性设置2',
309
- # '聚合数据', # 不在这里清理聚合数据, 还未开始聚合呢
310
- ]
311
- s.days = days
312
- s.optimize_list()
313
327
 
314
- # 聚合数据,并清理聚合数据
315
- query_data.data_aggregation(service_databases=[{'company': 'mysql'}], months=1, system_name='company')
328
+ # 数据聚合
329
+ query_data.data_aggregation(service_databases=[{'home_lx': 'mysql'}], months=1, system_name='home_lx')
330
+ time.sleep(60)
316
331
 
332
+ # 清理聚合数据
333
+ optimize_data.op_data(db_name_lists=['聚合数据'], days=3650, )
317
334
 
318
335
 
319
336
  def main():
@@ -329,7 +346,7 @@ def main():
329
346
  dp = aggregation.DatabaseUpdate(path=d_path)
330
347
  dp.new_unzip(is_move=True)
331
348
  dp.cleaning(is_move=True) # 公司台式机需要移除自身下载的文件
332
- dp.upload_df(service_databases=[{'company': 'mysql'}])
349
+ dp.upload_df(service_databases=[{'company': 'mysql'}], system_name='company')
333
350
 
334
351
  # 此操作用于修改 .copysh_conf 文件,将 ch_record 改为 false (更新完成)
335
352
  w = update_conf.UpdateConf()
@@ -57,14 +57,14 @@ class MysqlUpload:
57
57
  }
58
58
  self.filename = None
59
59
 
60
- def df_to_mysql(self, df, table_name, db_name='远程数据源', icm_update=[], icm_up=[], df_sql=False, drop_dup=True, drop_duplicates=False, filename=None, count=None, json_path=None, system_name=None):
60
+ def df_to_mysql(self, df, table_name, db_name='远程数据源', icm_update=[], icm_up=[], df_sql=False, drop_dup=False, drop_duplicates=False, filename=None, count=None, json_path=None, system_name=None):
61
61
  """
62
62
  将 df 写入数据库
63
63
  db_name: 数据库名称
64
64
  table_name: 集合/表名称
65
65
  df_sql: 这是一个临时参数, 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
66
66
  drop_duplicates:值为 True 时(仅限于聚合数据使用),其他情况不要设置此参数
67
- drop_dup: 值为 True 时检查重复数据再插入,反之直接上传
67
+ drop_dup: 值为 True 时检查重复数据再插入,反之直接上传,数据量大时会比较慢
68
68
  filename: 传这个参数是方便定位产生错误的文件
69
69
  icm_update: 增量更新, 在聚合数据中使用,原始文件不要使用,设置此参数时需将 drop_dup 改为 False
70
70
  使用增量更新: 必须确保 icm_update 传进来的列必须是数据表中唯一主键,值不会发生变化,不会重复,否则可能产生错乱覆盖情况
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 1.3.8
3
+ Version: 1.4.0
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -3,7 +3,7 @@
3
3
  from setuptools import setup, find_packages
4
4
 
5
5
  setup(name='mdbq',
6
- version='1.3.8',
6
+ version='1.4.0',
7
7
  author='xigua, ',
8
8
  author_email="2587125111@qq.com",
9
9
  url='https://pypi.org/project/mdbsql',
@@ -1,22 +0,0 @@
1
- # -*- coding: UTF-8 –*-
2
- from mdbq.mysql import mysql
3
- from mdbq.config import get_myconf
4
- """
5
- 对指定数据库所有冗余数据进行清理
6
- """
7
-
8
-
9
- def op_data(service_databases, days: int = 63):
10
- for service_database in service_databases:
11
- for service_name, database in service_database.items():
12
- username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
13
- s = mysql.OptimizeDatas(username=username, password=password, host=host, port=port)
14
- s.db_name_lists = [
15
- '聚合数据',
16
- ]
17
- s.days = days
18
- s.optimize_list()
19
-
20
-
21
- if __name__ == '__main__':
22
- op_data(service_databases=[{'home_lx': 'mysql'}], days=3650)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes