mdbq 1.3.8__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +7 -2
- mdbq/aggregation/optimize_data.py +49 -10
- mdbq/company/copysh.py +36 -19
- mdbq/mysql/mysql.py +2 -2
- {mdbq-1.3.8.dist-info → mdbq-1.4.0.dist-info}/METADATA +1 -1
- {mdbq-1.3.8.dist-info → mdbq-1.4.0.dist-info}/RECORD +8 -8
- {mdbq-1.3.8.dist-info → mdbq-1.4.0.dist-info}/WHEEL +0 -0
- {mdbq-1.3.8.dist-info → mdbq-1.4.0.dist-info}/top_level.txt +0 -0
mdbq/aggregation/aggregation.py
CHANGED
@@ -549,6 +549,7 @@ class DatabaseUpdate:
|
|
549
549
|
'数据库名': db_name,
|
550
550
|
'集合名称': collection_name,
|
551
551
|
'数据主体': df,
|
552
|
+
'文件名': name,
|
552
553
|
}
|
553
554
|
)
|
554
555
|
|
@@ -585,6 +586,7 @@ class DatabaseUpdate:
|
|
585
586
|
'数据库名': db_name,
|
586
587
|
'集合名称': collection_name,
|
587
588
|
'数据主体': df,
|
589
|
+
'文件名': name,
|
588
590
|
}
|
589
591
|
)
|
590
592
|
if is_move:
|
@@ -596,6 +598,7 @@ class DatabaseUpdate:
|
|
596
598
|
'数据库名': '聚合数据',
|
597
599
|
'集合名称': '日期表',
|
598
600
|
'数据主体': df,
|
601
|
+
'文件名': '日期表文件名',
|
599
602
|
}
|
600
603
|
)
|
601
604
|
|
@@ -641,7 +644,7 @@ class DatabaseUpdate:
|
|
641
644
|
port=port,
|
642
645
|
)
|
643
646
|
for data in self.datas:
|
644
|
-
df, db_name, collection_name = data['数据主体'], data['数据库名'], data['集合名称']
|
647
|
+
df, db_name, collection_name, rt_filename = data['数据主体'], data['数据库名'], data['集合名称'], data['文件名']
|
645
648
|
df_to_json.get_df_types(
|
646
649
|
df=df,
|
647
650
|
db_name=db_name,
|
@@ -653,7 +656,9 @@ class DatabaseUpdate:
|
|
653
656
|
db_name=db_name,
|
654
657
|
table_name=collection_name,
|
655
658
|
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
656
|
-
drop_dup=
|
659
|
+
drop_dup=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
660
|
+
filename=rt_filename, # 用来追踪处理进度
|
661
|
+
system_name=system_name, # 用来追踪处理进度
|
657
662
|
)
|
658
663
|
df_to_json.as_json_file() # 写入 json 文件, 包含数据的 dtypes 信息
|
659
664
|
|
@@ -6,16 +6,55 @@ from mdbq.config import get_myconf
|
|
6
6
|
"""
|
7
7
|
|
8
8
|
|
9
|
-
def op_data(service_databases, days: int = 63):
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
9
|
+
def op_data(db_name_lists, service_databases=None, days: int = 63):
|
10
|
+
""" service_databases 这个参数暂时没有用 """
|
11
|
+
# for service_database in service_databases:
|
12
|
+
# for service_name, database in service_database.items():
|
13
|
+
# username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
|
14
|
+
# s = mysql.OptimizeDatas(username=username, password=password, host=host, port=port)
|
15
|
+
# s.db_name_lists = [
|
16
|
+
# '聚合数据',
|
17
|
+
# ]
|
18
|
+
# s.days = days
|
19
|
+
# s.optimize_list()
|
20
|
+
|
21
|
+
if socket.gethostname() == 'xigua_lx' or socket.gethostname() == 'xigua1' or socket.gethostname() == 'Mac2.local':
|
22
|
+
# mongodb
|
23
|
+
username, password, host, port = get_myconf.select_config_values(
|
24
|
+
target_service='home_lx',
|
25
|
+
database='mongodb',
|
26
|
+
)
|
27
|
+
m = mongo.OptimizeDatas(username=username, password=password, host=host, port=port)
|
28
|
+
m.db_name_lists = db_name_lists
|
29
|
+
m.days = days
|
30
|
+
m.optimize_list()
|
31
|
+
if m.client:
|
32
|
+
m.client.close()
|
33
|
+
print(f'已关闭 mongodb 连接')
|
34
|
+
|
35
|
+
if socket.gethostname() == 'xigua_lx':
|
36
|
+
restart_mongodb() # mongodb 太占内存了, 重启服务, 释放内存
|
37
|
+
|
38
|
+
# Mysql
|
39
|
+
username, password, host, port = get_myconf.select_config_values(
|
40
|
+
target_service='home_lx',
|
41
|
+
database='mysql',
|
42
|
+
)
|
43
|
+
s = mysql.OptimizeDatas(username=username, password=password, host=host, port=port)
|
44
|
+
s.db_name_lists = db_name_lists
|
45
|
+
s.days = days
|
46
|
+
s.optimize_list()
|
47
|
+
|
48
|
+
elif socket.gethostname() == 'company':
|
49
|
+
# Mysql
|
50
|
+
username, password, host, port = get_myconf.select_config_values(
|
51
|
+
target_service='company',
|
52
|
+
database='mysql',
|
53
|
+
)
|
54
|
+
s = mysql.OptimizeDatas(username=username, password=password, host=host, port=port)
|
55
|
+
s.db_name_lists = db_name_lists
|
56
|
+
s.days = days
|
57
|
+
s.optimize_list()
|
19
58
|
|
20
59
|
|
21
60
|
if __name__ == '__main__':
|
mdbq/company/copysh.py
CHANGED
@@ -293,27 +293,44 @@ def op_data(days: int =3650):
|
|
293
293
|
|
294
294
|
# 清理数据库, 除了 聚合数据
|
295
295
|
if socket.gethostname() == 'company': # 公司台式机自身运行
|
296
|
-
# Mysql
|
297
|
-
username, password, host, port = get_myconf.select_config_values(
|
298
|
-
|
299
|
-
|
296
|
+
# # Mysql
|
297
|
+
# username, password, host, port = get_myconf.select_config_values(
|
298
|
+
# target_service='company',
|
299
|
+
# database='mysql',
|
300
|
+
# )
|
301
|
+
# s = mysql.OptimizeDatas(username=username, password=password, host=host, port=port)
|
302
|
+
# s.db_name_lists = [
|
303
|
+
# '京东数据2',
|
304
|
+
# '推广数据2',
|
305
|
+
# '市场数据2',
|
306
|
+
# '生意参谋2',
|
307
|
+
# '生意经2',
|
308
|
+
# '属性设置2',
|
309
|
+
# # '聚合数据', # 不在这里清理聚合数据, 还未开始聚合呢
|
310
|
+
# ]
|
311
|
+
# s.days = days
|
312
|
+
# s.optimize_list()
|
313
|
+
|
314
|
+
# 清理所有非聚合数据的库
|
315
|
+
optimize_data.op_data(
|
316
|
+
db_name_lists=[
|
317
|
+
'京东数据2',
|
318
|
+
'推广数据2',
|
319
|
+
'市场数据2',
|
320
|
+
'生意参谋2',
|
321
|
+
'生意经2',
|
322
|
+
'属性设置2',
|
323
|
+
# '聚合数据', # 不在这里清理聚合数据, 还未开始聚合呢
|
324
|
+
],
|
325
|
+
days=3650,
|
300
326
|
)
|
301
|
-
s = mysql.OptimizeDatas(username=username, password=password, host=host, port=port)
|
302
|
-
s.db_name_lists = [
|
303
|
-
'京东数据2',
|
304
|
-
'推广数据2',
|
305
|
-
'市场数据2',
|
306
|
-
'生意参谋2',
|
307
|
-
'生意经2',
|
308
|
-
'属性设置2',
|
309
|
-
# '聚合数据', # 不在这里清理聚合数据, 还未开始聚合呢
|
310
|
-
]
|
311
|
-
s.days = days
|
312
|
-
s.optimize_list()
|
313
327
|
|
314
|
-
|
315
|
-
|
328
|
+
# 数据聚合
|
329
|
+
query_data.data_aggregation(service_databases=[{'home_lx': 'mysql'}], months=1, system_name='home_lx')
|
330
|
+
time.sleep(60)
|
316
331
|
|
332
|
+
# 清理聚合数据
|
333
|
+
optimize_data.op_data(db_name_lists=['聚合数据'], days=3650, )
|
317
334
|
|
318
335
|
|
319
336
|
def main():
|
@@ -329,7 +346,7 @@ def main():
|
|
329
346
|
dp = aggregation.DatabaseUpdate(path=d_path)
|
330
347
|
dp.new_unzip(is_move=True)
|
331
348
|
dp.cleaning(is_move=True) # 公司台式机需要移除自身下载的文件
|
332
|
-
dp.upload_df(service_databases=[{'company': 'mysql'}])
|
349
|
+
dp.upload_df(service_databases=[{'company': 'mysql'}], system_name='company')
|
333
350
|
|
334
351
|
# 此操作用于修改 .copysh_conf 文件,将 ch_record 改为 false (更新完成)
|
335
352
|
w = update_conf.UpdateConf()
|
mdbq/mysql/mysql.py
CHANGED
@@ -57,14 +57,14 @@ class MysqlUpload:
|
|
57
57
|
}
|
58
58
|
self.filename = None
|
59
59
|
|
60
|
-
def df_to_mysql(self, df, table_name, db_name='远程数据源', icm_update=[], icm_up=[], df_sql=False, drop_dup=
|
60
|
+
def df_to_mysql(self, df, table_name, db_name='远程数据源', icm_update=[], icm_up=[], df_sql=False, drop_dup=False, drop_duplicates=False, filename=None, count=None, json_path=None, system_name=None):
|
61
61
|
"""
|
62
62
|
将 df 写入数据库
|
63
63
|
db_name: 数据库名称
|
64
64
|
table_name: 集合/表名称
|
65
65
|
df_sql: 这是一个临时参数, 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
66
66
|
drop_duplicates:值为 True 时(仅限于聚合数据使用),其他情况不要设置此参数
|
67
|
-
drop_dup: 值为 True
|
67
|
+
drop_dup: 值为 True 时检查重复数据再插入,反之直接上传,数据量大时会比较慢
|
68
68
|
filename: 传这个参数是方便定位产生错误的文件
|
69
69
|
icm_update: 增量更新, 在聚合数据中使用,原始文件不要使用,设置此参数时需将 drop_dup 改为 False
|
70
70
|
使用增量更新: 必须确保 icm_update 传进来的列必须是数据表中唯一主键,值不会发生变化,不会重复,否则可能产生错乱覆盖情况
|
@@ -1,17 +1,17 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/aggregation.py,sha256=
|
4
|
+
mdbq/aggregation/aggregation.py,sha256=cEKhmlqvrChXlxyxEq5qNMAgL_3hp1xtt9heOxfu9Z4,58113
|
5
5
|
mdbq/aggregation/df_types.py,sha256=oQJS2IBU3_IO6GMgbssHuC2yCjNnbta0QPGrFOwNLnU,7591
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=jTQaCrDy9hj36GIImshXANyQNFAvVKJTybkzJSh-qJ8,10671
|
7
|
-
mdbq/aggregation/optimize_data.py,sha256=
|
7
|
+
mdbq/aggregation/optimize_data.py,sha256=uv-KHSGazEpbUyCu2G3kjrdq0h0QLZY7BhIkcbqQqek,2360
|
8
8
|
mdbq/aggregation/query_data.py,sha256=qSCN3Y20Ken4cmwG63kMnvz9jg115PB9qvo-hF9Aq34,27504
|
9
9
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
10
10
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
11
11
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
12
12
|
mdbq/clean/data_clean.py,sha256=TnG0t6Ted9miMoBdGanpTmj6AUa0YFSyEBvmgN3V7sw,87558
|
13
13
|
mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
|
14
|
-
mdbq/company/copysh.py,sha256=
|
14
|
+
mdbq/company/copysh.py,sha256=NJWEnxOZhbnqbHPQfCxBNAcl-LLLEk4Gn9QP35XlA0c,17041
|
15
15
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
16
16
|
mdbq/config/get_myconf.py,sha256=bp6bVARZVm3ANj1pmM9hLB8Ao539TUWeM9xxeSsBpzw,5994
|
17
17
|
mdbq/config/products.py,sha256=9gqXJMsw8KKuD4Xs6krNgcF7AuWDvV7clI6wVi3QjcA,4260
|
@@ -24,7 +24,7 @@ mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
|
|
24
24
|
mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
|
25
25
|
mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
|
26
26
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
27
|
-
mdbq/mysql/mysql.py,sha256=
|
27
|
+
mdbq/mysql/mysql.py,sha256=kkkbFJHox0diPpDadwHicRQcDUxdH1h8MsMF1MNNBRI,44344
|
28
28
|
mdbq/mysql/s_query.py,sha256=a33aYhW6gAnspIZfQ7l23ePln9-MD1f_ukypr5M0jd8,8018
|
29
29
|
mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
|
30
30
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
@@ -35,7 +35,7 @@ mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
|
|
35
35
|
mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
|
36
36
|
mdbq/pbix/refresh_all.py,sha256=tgy762608HMaXWynbOURIf2UVMuSPybzrDXQnOOcnZU,6102
|
37
37
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
38
|
-
mdbq-1.
|
39
|
-
mdbq-1.
|
40
|
-
mdbq-1.
|
41
|
-
mdbq-1.
|
38
|
+
mdbq-1.4.0.dist-info/METADATA,sha256=2azUUrdXUdVs0FNaexwkZcRDo4_MnvwLYtX4MvSIGlU,245
|
39
|
+
mdbq-1.4.0.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
|
40
|
+
mdbq-1.4.0.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
41
|
+
mdbq-1.4.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|