mdbq 1.8.6__tar.gz → 1.8.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbq-1.8.6 → mdbq-1.8.8}/PKG-INFO +1 -1
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/aggregation/aggregation.py +7 -3
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/aggregation/query_data.py +3 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/dataframe/converter.py +1 -1
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq.egg-info/PKG-INFO +1 -1
- {mdbq-1.8.6 → mdbq-1.8.8}/setup.py +1 -1
- {mdbq-1.8.6 → mdbq-1.8.8}/README.txt +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/__init__.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/__version__.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/aggregation/__init__.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/aggregation/df_types.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/aggregation/mysql_types.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/aggregation/optimize_data.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/bdup/__init__.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/bdup/bdup.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/clean/__init__.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/clean/data_clean.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/company/__init__.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/company/copysh.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/config/__init__.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/config/get_myconf.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/config/products.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/config/set_support.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/config/update_conf.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/dataframe/__init__.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/log/__init__.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/log/mylogger.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/mongo/__init__.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/mongo/mongo.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/mysql/__init__.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/mysql/mysql.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/mysql/s_query.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/mysql/year_month_day.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/other/__init__.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/other/porxy.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/other/pov_city.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/other/sku_picture.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/other/ua_sj.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/pbix/__init__.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/pbix/pbix_refresh.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/pbix/refresh_all.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq/spider/__init__.py +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq.egg-info/SOURCES.txt +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq.egg-info/dependency_links.txt +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/mdbq.egg-info/top_level.txt +0 -0
- {mdbq-1.8.6 → mdbq-1.8.8}/setup.cfg +0 -0
@@ -709,8 +709,9 @@ class DatabaseUpdate:
|
|
709
709
|
df=df,
|
710
710
|
db_name=db_name,
|
711
711
|
table_name=collection_name,
|
712
|
+
move_insert=True, # 先删除,再插入
|
712
713
|
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
713
|
-
drop_duplicates=
|
714
|
+
drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
714
715
|
filename=rt_filename, # 用来追踪处理进度
|
715
716
|
service_database=service_database, # 字典
|
716
717
|
)
|
@@ -876,8 +877,9 @@ class DatabaseUpdate:
|
|
876
877
|
df=df,
|
877
878
|
db_name='聚合数据',
|
878
879
|
table_name='日期表',
|
880
|
+
move_insert=True, # 先删除,再插入
|
879
881
|
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
880
|
-
drop_duplicates=
|
882
|
+
drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
881
883
|
filename=None, # 用来追踪处理进度
|
882
884
|
service_database=service_database, # 用来追踪处理进度
|
883
885
|
)
|
@@ -907,6 +909,7 @@ class DatabaseUpdate:
|
|
907
909
|
df=df,
|
908
910
|
db_name='属性设置2',
|
909
911
|
table_name='主推商品',
|
912
|
+
move_insert=False, # 先删除,再插入
|
910
913
|
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
911
914
|
drop_duplicates=True, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
912
915
|
filename=None, # 用来追踪处理进度
|
@@ -994,6 +997,7 @@ def upload_dir(path, db_name, collection_name, dbs={'mysql': True, 'mongodb': Tr
|
|
994
997
|
d.df_to_mongo(df=df, db_name=db_name, collection_name=collection_name)
|
995
998
|
if dbs['mysql']: # drop_duplicates: 值为 True 时检查重复数据再插入
|
996
999
|
m.df_to_mysql(df=df, db_name=db_name, table_name=collection_name,
|
1000
|
+
move_insert=False, # 先删除,再插入
|
997
1001
|
df_sql = True,
|
998
1002
|
drop_duplicates=False,
|
999
1003
|
filename=name, count=f'{i}/{count}')
|
@@ -1016,7 +1020,7 @@ def one_file_to_mysql(file, db_name, table_name, target_service, database):
|
|
1016
1020
|
df = pd.read_csv(file, encoding='utf-8_sig', header=0, na_filter=False, float_precision='high')
|
1017
1021
|
# df.replace(to_replace=[','], value='', regex=True, inplace=True) # 替换掉特殊字符
|
1018
1022
|
m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
|
1019
|
-
m.df_to_mysql(df=df, db_name=db_name, table_name=table_name, filename=filename, df_sql=True, drop_duplicates=False,)
|
1023
|
+
m.df_to_mysql(df=df, db_name=db_name, table_name=table_name, filename=filename, move_insert=False, df_sql=True, drop_duplicates=False,)
|
1020
1024
|
|
1021
1025
|
|
1022
1026
|
def file_dir(one_file=True):
|
@@ -1263,6 +1263,7 @@ def data_aggregation_one(service_databases=[{}], months=1):
|
|
1263
1263
|
df=df,
|
1264
1264
|
db_name=db_name,
|
1265
1265
|
table_name=table_name,
|
1266
|
+
move_insert=False, # 先删除,再插入
|
1266
1267
|
df_sql=True,
|
1267
1268
|
drop_duplicates=False,
|
1268
1269
|
# icm_update=unique_key_list,
|
@@ -1394,6 +1395,7 @@ def data_aggregation(service_databases=[{}], months=1):
|
|
1394
1395
|
df=g.sp_index_datas,
|
1395
1396
|
db_name='属性设置2',
|
1396
1397
|
table_name='商品索引表',
|
1398
|
+
move_insert=False, # 先删除,再插入
|
1397
1399
|
# df_sql=True,
|
1398
1400
|
drop_duplicates=False,
|
1399
1401
|
icm_update=['商品id'],
|
@@ -1417,6 +1419,7 @@ def data_aggregation(service_databases=[{}], months=1):
|
|
1417
1419
|
df=df,
|
1418
1420
|
db_name=db_name,
|
1419
1421
|
table_name=table_name,
|
1422
|
+
move_insert=False, # 先删除,再插入
|
1420
1423
|
# df_sql=True,
|
1421
1424
|
drop_duplicates=False,
|
1422
1425
|
icm_update=unique_key_list,
|
@@ -32,7 +32,7 @@ class DataFrameConverter(object):
|
|
32
32
|
|
33
33
|
# dtypes = df.dtypes.apply(str).to_dict() # 将 dataframe 数据类型转为字典形式
|
34
34
|
df.replace([np.inf, -np.inf], 0, inplace=True) # 清理一些非法值
|
35
|
-
df.replace(to_replace=['\\N', '-', '--', '', 'nan'], value=0, regex=False, inplace=True) # 替换掉特殊字符
|
35
|
+
df.replace(to_replace=['\\N', '-', '--', '', 'nan', 'NAN'], value=0, regex=False, inplace=True) # 替换掉特殊字符
|
36
36
|
df.replace(to_replace=[','], value='', regex=True, inplace=True)
|
37
37
|
df.replace(to_replace=['="'], value='', regex=True, inplace=True) # ="和"不可以放在一起清洗, 因为有: id=86785565
|
38
38
|
df.replace(to_replace=['"'], value='', regex=True, inplace=True)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|