mdbq 1.4.5__tar.gz → 1.4.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbq-1.4.5 → mdbq-1.4.6}/PKG-INFO +1 -1
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/aggregation/aggregation.py +9 -9
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/aggregation/query_data.py +6 -6
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/mysql/mysql.py +32 -50
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq.egg-info/PKG-INFO +1 -1
- {mdbq-1.4.5 → mdbq-1.4.6}/setup.py +1 -1
- {mdbq-1.4.5 → mdbq-1.4.6}/README.txt +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/__init__.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/__version__.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/aggregation/__init__.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/aggregation/df_types.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/aggregation/mysql_types.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/aggregation/optimize_data.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/bdup/__init__.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/bdup/bdup.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/clean/__init__.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/clean/data_clean.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/company/__init__.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/company/copysh.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/config/__init__.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/config/get_myconf.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/config/products.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/config/set_support.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/config/update_conf.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/dataframe/__init__.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/dataframe/converter.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/log/__init__.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/log/mylogger.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/mongo/__init__.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/mongo/mongo.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/mysql/__init__.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/mysql/s_query.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/mysql/year_month_day.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/other/__init__.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/other/porxy.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/other/pov_city.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/other/ua_sj.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/pbix/__init__.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/pbix/pbix_refresh.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/pbix/refresh_all.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq/spider/__init__.py +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq.egg-info/SOURCES.txt +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq.egg-info/dependency_links.txt +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/mdbq.egg-info/top_level.txt +0 -0
- {mdbq-1.4.5 → mdbq-1.4.6}/setup.cfg +0 -0
@@ -660,7 +660,7 @@ class DatabaseUpdate:
|
|
660
660
|
db_name=db_name,
|
661
661
|
table_name=collection_name,
|
662
662
|
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
663
|
-
|
663
|
+
drop_duplicates=True, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
664
664
|
filename=rt_filename, # 用来追踪处理进度
|
665
665
|
system_name=system_name, # 用来追踪处理进度
|
666
666
|
)
|
@@ -824,10 +824,10 @@ class DatabaseUpdate:
|
|
824
824
|
)
|
825
825
|
m.df_to_mysql(
|
826
826
|
df=df,
|
827
|
-
db_name=
|
828
|
-
table_name=
|
827
|
+
db_name='聚合数据',
|
828
|
+
table_name='日期表',
|
829
829
|
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
830
|
-
|
830
|
+
drop_duplicates=True, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
831
831
|
filename=None, # 用来追踪处理进度
|
832
832
|
system_name=service_name, # 用来追踪处理进度
|
833
833
|
)
|
@@ -912,10 +912,10 @@ def upload_dir(path, db_name, collection_name, dbs={'mysql': True, 'mongodb': Tr
|
|
912
912
|
|
913
913
|
if dbs['mongodb']:
|
914
914
|
d.df_to_mongo(df=df, db_name=db_name, collection_name=collection_name)
|
915
|
-
if dbs['mysql']: #
|
915
|
+
if dbs['mysql']: # drop_duplicates: 值为 True 时检查重复数据再插入
|
916
916
|
m.df_to_mysql(df=df, db_name=db_name, table_name=collection_name,
|
917
|
-
|
918
|
-
# nas.df_to_mysql(df=df, db_name=db_name, table_name=collection_name)
|
917
|
+
drop_duplicates=True, filename=name, count=f'{i}/{count}')
|
918
|
+
# nas.df_to_mysql(df=df, db_name=db_name, table_name=collection_name, drop_duplicates=True,)
|
919
919
|
except Exception as e:
|
920
920
|
print(name, e)
|
921
921
|
i += 1
|
@@ -933,7 +933,7 @@ def one_file_to_mysql(file, db_name, table_name, target_service, database):
|
|
933
933
|
filename = os.path.basename(file)
|
934
934
|
df = pd.read_csv(file, encoding='utf-8_sig', header=0, na_filter=False, float_precision='high')
|
935
935
|
m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
|
936
|
-
m.df_to_mysql(df=df, db_name=db_name, table_name=table_name, filename=filename, df_sql=True)
|
936
|
+
m.df_to_mysql(df=df, db_name=db_name, table_name=table_name, filename=filename, df_sql=True, drop_duplicates=False,)
|
937
937
|
|
938
938
|
|
939
939
|
def file_dir(one_file=True):
|
@@ -1037,7 +1037,7 @@ if __name__ == '__main__':
|
|
1037
1037
|
# username, password, host, port = get_myconf.select_config_values(target_service='company', database='mysql')
|
1038
1038
|
# m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
|
1039
1039
|
# m.df_to_mysql(df=df, db_name='test', table_name='增量更新测试',
|
1040
|
-
#
|
1040
|
+
# drop_duplicates=True,
|
1041
1041
|
# # icm_update=['日期', '推广费余额'],
|
1042
1042
|
# system_name='company',
|
1043
1043
|
# )
|
@@ -526,7 +526,7 @@ def data_aggregation_one(service_databases=[{}], months=1, system_name=None,):
|
|
526
526
|
df=df,
|
527
527
|
db_name=db_name,
|
528
528
|
table_name=table_name,
|
529
|
-
|
529
|
+
drop_duplicates=False,
|
530
530
|
icm_update=unique_key_list,
|
531
531
|
system_name=system_name,
|
532
532
|
service_databases=service_databases,
|
@@ -596,7 +596,7 @@ def data_aggregation(service_databases=[{}], months=1, system_name=None,):
|
|
596
596
|
df=df,
|
597
597
|
db_name=db_name,
|
598
598
|
table_name=table_name,
|
599
|
-
|
599
|
+
drop_duplicates=False,
|
600
600
|
icm_update=unique_key_list,
|
601
601
|
system_name=system_name,
|
602
602
|
service_databases=service_databases,
|
@@ -606,7 +606,7 @@ def data_aggregation(service_databases=[{}], months=1, system_name=None,):
|
|
606
606
|
df=res,
|
607
607
|
db_name='聚合数据',
|
608
608
|
table_name='_全店商品销售',
|
609
|
-
|
609
|
+
drop_duplicates=False,
|
610
610
|
icm_update=['日期', '商品id'], # 设置唯一主键
|
611
611
|
system_name = system_name,
|
612
612
|
service_databases=service_databases,
|
@@ -616,7 +616,7 @@ def data_aggregation(service_databases=[{}], months=1, system_name=None,):
|
|
616
616
|
df=res,
|
617
617
|
db_name='聚合数据',
|
618
618
|
table_name='_推广商品销售',
|
619
|
-
|
619
|
+
drop_duplicates=False,
|
620
620
|
icm_update=['日期', '商品id'], # 设置唯一主键
|
621
621
|
system_name=system_name,
|
622
622
|
service_databases=service_databases,
|
@@ -627,6 +627,6 @@ def data_aggregation(service_databases=[{}], months=1, system_name=None,):
|
|
627
627
|
|
628
628
|
|
629
629
|
if __name__ == '__main__':
|
630
|
-
|
631
|
-
data_aggregation_one(service_databases=[{'company': 'mysql'}], months=10, system_name='company')
|
630
|
+
data_aggregation(service_databases=[{'home_lx': 'mysql'}], months=1, system_name='home_lx')
|
631
|
+
# data_aggregation_one(service_databases=[{'company': 'mysql'}], months=10, system_name='company')
|
632
632
|
# optimize_data.op_data(service_databases=[{'company': 'mysql'}], days=3650) # 立即启动对聚合数据的清理工作
|
@@ -57,17 +57,19 @@ class MysqlUpload:
|
|
57
57
|
}
|
58
58
|
self.filename = None
|
59
59
|
|
60
|
-
def df_to_mysql(self, df, table_name, db_name='远程数据源', icm_update=[],
|
60
|
+
def df_to_mysql(self, df, table_name, db_name='远程数据源', icm_update=[], service_databases=[{'home_lx': 'mysql'}], df_sql=False, drop_duplicates=False, filename=None, count=None, json_path=None, system_name=None):
|
61
61
|
"""
|
62
62
|
将 df 写入数据库
|
63
63
|
db_name: 数据库名称
|
64
64
|
table_name: 集合/表名称
|
65
|
-
df_sql: 这是一个临时参数, 值为 True 时使用 df.to_sql 函数上传整个表,
|
66
|
-
drop_duplicates
|
67
|
-
|
68
|
-
filename: 传这个参数是方便定位产生错误的文件
|
69
|
-
icm_update: 增量更新, 在聚合数据中使用,原始文件不要使用,设置此参数时需将 drop_dup 改为 False
|
65
|
+
df_sql: 这是一个临时参数, 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重,初创表大量上传数据的时候使用
|
66
|
+
drop_duplicates: 值为 True 时检查重复数据再插入,反之直接上传,数据量大时会比较慢
|
67
|
+
icm_update: 增量更新, 在聚合数据中使用,原始文件不要使用,设置此参数时需将 drop_duplicates 改为 False
|
70
68
|
使用增量更新: 必须确保 icm_update 传进来的列必须是数据表中唯一主键,值不会发生变化,不会重复,否则可能产生错乱覆盖情况
|
69
|
+
filename: 用来追踪处理进度,传这个参数是方便定位产生错误的文件
|
70
|
+
system_name: 同样是用来追踪处理进度
|
71
|
+
service_databases: 这个参数是用来设置更新哪台服务器的 types 信息到本地 json 文件
|
72
|
+
json_path: 这个参数同样也是是用来设置更新 json 文件
|
71
73
|
"""
|
72
74
|
self.filename = filename
|
73
75
|
if isinstance(df, pd.DataFrame):
|
@@ -81,19 +83,6 @@ class MysqlUpload:
|
|
81
83
|
cv = converter.DataFrameConverter()
|
82
84
|
df = cv.convert_df_cols(df=df) # 清理 dataframe 非法值
|
83
85
|
|
84
|
-
# if df_sql:
|
85
|
-
# now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
|
86
|
-
# print(f'{now}正在更新 mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count},{self.filename}')
|
87
|
-
# engine = create_engine(f"mysql+pymysql://{self.username}:{self.password}@{self.host}:{self.port}/{db_name}") # 创建数据库引擎
|
88
|
-
# df.to_sql(
|
89
|
-
# name=table_name,
|
90
|
-
# con=engine,
|
91
|
-
# if_exists='append',
|
92
|
-
# index=False,
|
93
|
-
# chunksize=1000,
|
94
|
-
# dtype={'京东价': 'INT'},
|
95
|
-
# )
|
96
|
-
# return
|
97
86
|
connection = pymysql.connect(**self.config) # 连接数据库
|
98
87
|
with connection.cursor() as cursor:
|
99
88
|
cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
|
@@ -173,14 +162,14 @@ class MysqlUpload:
|
|
173
162
|
elif cl:
|
174
163
|
mysql_types.mysql_all_dtypes(service_databases=service_databases, system_name=system_name) # 更新所有数据库所有数据表的 dtypes 信息到本地 json
|
175
164
|
|
176
|
-
# 4. 移除指定日期范围内的数据,仅限于聚合数据使用,其他情况不要设置
|
177
|
-
if drop_duplicates and '日期' in df.columns.tolist():
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
165
|
+
# # 4. 移除指定日期范围内的数据,仅限于聚合数据使用,其他情况不要设置
|
166
|
+
# if drop_duplicates and '日期' in df.columns.tolist():
|
167
|
+
# dates = df['日期'].values.tolist()
|
168
|
+
# start_date = pd.to_datetime(min(dates)).strftime('%Y-%m-%d')
|
169
|
+
# end_date = (pd.to_datetime(max(dates)) + datetime.timedelta(days=1)).strftime('%Y-%m-%d')
|
170
|
+
# sql = f"DELETE FROM `{table_name}` WHERE {'日期'} BETWEEN '%s' AND '%s'" % (start_date, end_date)
|
171
|
+
# cursor.execute(sql)
|
172
|
+
# connection.commit()
|
184
173
|
|
185
174
|
# 5. 更新插入数据
|
186
175
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
|
@@ -200,7 +189,7 @@ class MysqlUpload:
|
|
200
189
|
condition = ' AND '.join(condition) # 构建查询条件
|
201
190
|
# print(condition)
|
202
191
|
|
203
|
-
if
|
192
|
+
if drop_duplicates: # 查重插入
|
204
193
|
sql = "SELECT %s FROM %s WHERE %s" % (cols, table_name, condition)
|
205
194
|
# sql = f"SELECT {cols} FROM `{table_name}` WHERE `创建时间` = '2014-09-19 14:32:33'"
|
206
195
|
# print(sql)
|
@@ -211,7 +200,7 @@ class MysqlUpload:
|
|
211
200
|
cursor.execute(sql)
|
212
201
|
# else:
|
213
202
|
# print(f'重复数据不插入: {condition[:50]}...')
|
214
|
-
elif icm_update: #
|
203
|
+
elif icm_update: # 增量更新, 专门用于聚合数据,其他库不要调用
|
215
204
|
""" 使用增量更新: 需确保 icm_update['主键'] 传进来的列必须是数据表中唯一主键,值不会发生变化且不会重复,否则可能产生覆盖情况 """
|
216
205
|
sql = 'SELECT COLUMN_NAME FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
|
217
206
|
cursor.execute(sql, (db_name, {table_name}))
|
@@ -261,26 +250,6 @@ class MysqlUpload:
|
|
261
250
|
else: # 没有数据返回,则直接插入数据
|
262
251
|
sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({values});"
|
263
252
|
cursor.execute(sql)
|
264
|
-
# elif icm_up:
|
265
|
-
# sql = 'SELECT COLUMN_NAME FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
|
266
|
-
# cursor.execute(sql, (db_name, {table_name}))
|
267
|
-
# columns = cursor.fetchall()
|
268
|
-
# cols_exist = [col['COLUMN_NAME'] for col in columns] # 数据表的所有列, 返回 list
|
269
|
-
# cols_exist = [item for item in cols_exist if item != 'id']
|
270
|
-
# update_col = [item for item in cols_exist if item not in icm_up] # 除了主键外的其他列
|
271
|
-
#
|
272
|
-
# unique_keys = ', '.join([f"`{item}`" for item in cols_exist])
|
273
|
-
# unique_keys_values = ', '.join([f"'{data[item]}'" for item in cols_exist])
|
274
|
-
#
|
275
|
-
# change_values = []
|
276
|
-
# for col in update_col:
|
277
|
-
# change_values += [f"`{col}` = '{str(data[col])}'"]
|
278
|
-
# change_values = ', '.join(f"{item}" for item in change_values) # 注意这里 item 外面没有反引号
|
279
|
-
# # print(change_values)
|
280
|
-
# sql = f"INSERT INTO `{table_name}` ({unique_keys}) VALUES ({unique_keys_values}) ON DUPLICATE KEY UPDATE {change_values};"
|
281
|
-
# print(sql)
|
282
|
-
# # cursor.execute(sql)
|
283
|
-
|
284
253
|
else:
|
285
254
|
sql = f"INSERT INTO `{table_name}` ({cols}) VALUES (%s);" % (values)
|
286
255
|
cursor.execute(sql)
|
@@ -834,4 +803,17 @@ def download_datas_bak(table_name, save_path, start_date):
|
|
834
803
|
|
835
804
|
if __name__ == '__main__':
|
836
805
|
username, password, host, port = get_myconf.select_config_values(target_service='home_lx', database='mysql')
|
837
|
-
print(username, password, host, port)
|
806
|
+
# print(username, password, host, port)
|
807
|
+
path = '/Users/xigua/Downloads/人群洞察.csv'
|
808
|
+
df = pd.read_csv(path, encoding='utf-8_sig', header=0, na_filter=False)
|
809
|
+
# print(df)
|
810
|
+
m = MysqlUpload(username=username, password=password, host=host, port=port)
|
811
|
+
m.df_to_mysql(
|
812
|
+
df=df,
|
813
|
+
db_name='test',
|
814
|
+
table_name='测试数据',
|
815
|
+
drop_duplicates=True,
|
816
|
+
# system_name=system_name,
|
817
|
+
# service_databases=service_databases,
|
818
|
+
)
|
819
|
+
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|