mdbq 1.1.0__tar.gz → 1.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbq-1.1.0 → mdbq-1.1.2}/PKG-INFO +1 -1
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/aggregation/aggregation.py +10 -5
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/aggregation/query_data.py +1 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/config/get_myconf.py +2 -2
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/mysql/mysql.py +34 -19
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq.egg-info/PKG-INFO +1 -1
- {mdbq-1.1.0 → mdbq-1.1.2}/setup.py +1 -1
- {mdbq-1.1.0 → mdbq-1.1.2}/README.txt +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/__init__.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/__version__.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/aggregation/__init__.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/aggregation/df_types.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/aggregation/mysql_types.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/aggregation/optimize_data.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/bdup/__init__.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/bdup/bdup.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/clean/__init__.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/clean/data_clean.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/company/__init__.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/company/copysh.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/config/__init__.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/config/products.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/config/set_support.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/config/update_conf.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/dataframe/__init__.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/dataframe/converter.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/log/__init__.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/log/mylogger.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/mongo/__init__.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/mongo/mongo.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/mysql/__init__.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/mysql/data_types_/345/215/263/345/260/206/345/210/240/351/231/244.py" +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/mysql/s_query.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/mysql/year_month_day.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/other/__init__.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/other/porxy.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/other/pov_city.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/other/ua_sj.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/pbix/__init__.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/pbix/pbix_refresh.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/pbix/refresh_all.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq/spider/__init__.py +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq.egg-info/SOURCES.txt +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq.egg-info/dependency_links.txt +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/mdbq.egg-info/top_level.txt +0 -0
- {mdbq-1.1.0 → mdbq-1.1.2}/setup.cfg +0 -0
@@ -874,12 +874,16 @@ def upload(path, db_name, collection_name):
|
|
874
874
|
collection_name=collection_name,
|
875
875
|
)
|
876
876
|
# print(dtypes)
|
877
|
+
count = 0
|
878
|
+
for root, dirs, files in os.walk(path, topdown=False):
|
879
|
+
for name in files:
|
880
|
+
count += 1
|
881
|
+
i = 0 # 用来统计当前处理文件进度
|
877
882
|
for root, dirs, files in os.walk(path, topdown=False):
|
878
883
|
for name in files:
|
879
884
|
if '~$' in name or '.DS' in name or '.localized' in name or 'baidu' in name:
|
880
885
|
continue
|
881
886
|
if name.endswith('.csv'):
|
882
|
-
# print(name)
|
883
887
|
try:
|
884
888
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
885
889
|
if len(df) == 0:
|
@@ -896,10 +900,11 @@ def upload(path, db_name, collection_name):
|
|
896
900
|
df = df.astype(dtypes)
|
897
901
|
|
898
902
|
# d.df_to_mongo(df=df, db_name=db_name, collection_name=collection_name)
|
899
|
-
m.df_to_mysql(df=df, db_name=db_name, table_name=collection_name)
|
903
|
+
m.df_to_mysql(df=df, db_name=db_name, table_name=collection_name, filename=name, count=f'{i}/{count}')
|
900
904
|
# nas.df_to_mysql(df=df, db_name=db_name, table_name=collection_name)
|
901
905
|
except Exception as e:
|
902
906
|
print(name, e)
|
907
|
+
i += 1
|
903
908
|
if d.client:
|
904
909
|
d.client.close() # 必须手动关闭数据库连接
|
905
910
|
|
@@ -920,7 +925,7 @@ if __name__ == '__main__':
|
|
920
925
|
# username, password, host, port = get_myconf.select_config_values(target_service='nas', database='mysql')
|
921
926
|
# print(username, password, host, port)
|
922
927
|
upload(
|
923
|
-
path='/Users/xigua/数据中心/原始文件2
|
924
|
-
db_name = '
|
925
|
-
collection_name = '
|
928
|
+
path='/Users/xigua/数据中心/原始文件2/生意经/宝贝指标',
|
929
|
+
db_name = '生意经2',
|
930
|
+
collection_name = '宝贝指标',
|
926
931
|
)
|
@@ -62,7 +62,7 @@ def select_config_values(target_service, database, path=None):
|
|
62
62
|
if socket.gethostname() == 'xigua_lx':
|
63
63
|
# 本机自身运行使用 127.0.0.1
|
64
64
|
options = ['username_mysql_lx_nw', 'password_mysql_lx_nw', 'host_bd', 'port_mysql_lx_nw',]
|
65
|
-
elif socket.gethostname() == 'xigua1' or socket.gethostname() == '
|
65
|
+
elif socket.gethostname() == 'xigua1' or socket.gethostname() == 'MacBook-Pro':
|
66
66
|
# 内网地址:正在运行的是 家里笔记本或者台式机,或者 macb ook pro
|
67
67
|
options = ['username_mysql_lx_nw', 'password_mysql_lx_nw', 'host_mysql_lx_nw', 'port_mysql_lx_nw',]
|
68
68
|
else:
|
@@ -126,6 +126,6 @@ def main():
|
|
126
126
|
|
127
127
|
if __name__ == '__main__':
|
128
128
|
# main()
|
129
|
-
r, d, s, g = select_config_values(target_service='home_lx', database='
|
129
|
+
r, d, s, g = select_config_values(target_service='home_lx', database='mysql')
|
130
130
|
print(r, d, s, g, type(r), type(d), type(s), type(g))
|
131
131
|
print(f'本机: {platform.system()} // {socket.gethostname()}')
|
@@ -35,14 +35,16 @@ class MysqlUpload:
|
|
35
35
|
'charset': charset, # utf8mb4 支持存储四字节的UTF-8字符集
|
36
36
|
'cursorclass': pymysql.cursors.DictCursor,
|
37
37
|
}
|
38
|
+
self.filename = None
|
38
39
|
|
39
|
-
def df_to_mysql(self, df, table_name, db_name='远程数据源', drop_duplicates=False):
|
40
|
+
def df_to_mysql(self, df, table_name, db_name='远程数据源', drop_duplicates=False, filename=None, count=None):
|
40
41
|
"""
|
41
42
|
将 df 写入数据库
|
42
43
|
db_name: 数据库名称
|
43
44
|
table_name: 集合/表名称
|
44
45
|
drop_duplicates:仅限于聚合数据使用,其他情况不要设置
|
45
46
|
"""
|
47
|
+
self.filename = filename
|
46
48
|
if isinstance(df, pd.DataFrame):
|
47
49
|
if len(df) == 0:
|
48
50
|
print(f'{db_name}: {table_name} 传入的 df 数据长度为0')
|
@@ -95,7 +97,7 @@ class MysqlUpload:
|
|
95
97
|
for col in col_not_exist:
|
96
98
|
try:
|
97
99
|
# 创建列,需转义
|
98
|
-
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]}
|
100
|
+
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]};"
|
99
101
|
cursor.execute(sql)
|
100
102
|
print(f"添加列: {col}({dtypes[col]})") # 添加列并指定数据类型
|
101
103
|
|
@@ -105,8 +107,8 @@ class MysqlUpload:
|
|
105
107
|
result = cursor.fetchone() # 检查索引是否存在
|
106
108
|
if not result:
|
107
109
|
cursor.execute(f"CREATE INDEX index_name ON `{table_name}`(`{col}`)")
|
108
|
-
except:
|
109
|
-
|
110
|
+
except Exception as e:
|
111
|
+
print(f'{self.filename}: {e}')
|
110
112
|
connection.commit() # 提交事务
|
111
113
|
|
112
114
|
# 返回这些结果的目的是等添加完列再写 json 文件才能读到 types 信息
|
@@ -128,7 +130,7 @@ class MysqlUpload:
|
|
128
130
|
|
129
131
|
# 5. 更新插入数据
|
130
132
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
|
131
|
-
print(f'{now}正在更新 mysql ({self.host}:{self.port}) {db_name}/{table_name}')
|
133
|
+
print(f'{now}正在更新 mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count},{self.filename}')
|
132
134
|
datas = df.to_dict(orient='records')
|
133
135
|
for data in datas:
|
134
136
|
try:
|
@@ -143,11 +145,14 @@ class MysqlUpload:
|
|
143
145
|
|
144
146
|
sql = f"SELECT {cols} FROM `{table_name}` WHERE {condition}"
|
145
147
|
cursor.execute(sql)
|
146
|
-
result = cursor.fetchall() # 获取查询结果,
|
148
|
+
result = cursor.fetchall() # 获取查询结果, 有结果返回 list 表示数据已存在(不重复插入),没有则返回空 tuple
|
147
149
|
if not result: # 数据不存在则插入
|
148
150
|
sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({values});"
|
149
151
|
cursor.execute(sql)
|
152
|
+
else:
|
153
|
+
print(f'重复数据不插入: {condition}')
|
150
154
|
except Exception as e:
|
155
|
+
print(f'{self.filename}:')
|
151
156
|
print(f'mysql -> df_to_mysql 报错: {e}')
|
152
157
|
connection.commit() # 提交事务
|
153
158
|
|
@@ -183,6 +188,18 @@ class MysqlUpload:
|
|
183
188
|
|
184
189
|
def convert_dtype_to_sql(self, df, col, dtype):
|
185
190
|
""" 按照以下规则转换DataFrame列的数据类型为 MYSQL 专有的数据类型 """
|
191
|
+
|
192
|
+
def find_longest_decimal_value(number_list):
|
193
|
+
# 取列表中小数位数最长的值
|
194
|
+
longest_value = None
|
195
|
+
max_decimals = 0
|
196
|
+
for num in number_list:
|
197
|
+
decimal_places = len(str(num).split('.')[1])
|
198
|
+
if decimal_places > max_decimals:
|
199
|
+
max_decimals = decimal_places
|
200
|
+
longest_value = num
|
201
|
+
return longest_value
|
202
|
+
|
186
203
|
# 最优先处理 ID 类型, 在 mysql 里面, 有些列数字过长不能存储为 int 类型
|
187
204
|
if 'id' in col or 'ID' in col or 'Id' in col or '摘要' in col or '商家编码' in col or '单号' in col or '款号' in col:
|
188
205
|
return 'mediumtext'
|
@@ -207,9 +224,9 @@ class MysqlUpload:
|
|
207
224
|
return 'mediumtext'
|
208
225
|
return 'INT'
|
209
226
|
elif dtype == 'float64':
|
210
|
-
|
211
|
-
int_step = len(str(
|
212
|
-
f_step = len(str(
|
227
|
+
res = find_longest_decimal_value(df[col].tolist()) # 取小数位数最长的值
|
228
|
+
int_step = len(str(res).split('.')[0]) # 整数位数长度
|
229
|
+
f_step = len(str(res).split('.')[1]) # 小数位数长度
|
213
230
|
if int_step >= 12:
|
214
231
|
return 'mediumtext' # mysql 中不要使用 float 和 double 类型,会影响计算结果
|
215
232
|
elif int_step >= 8 and f_step >= 0:
|
@@ -679,24 +696,22 @@ def download_datas(table_name, save_path, start_date):
|
|
679
696
|
|
680
697
|
def one_file_to_mysql(file, db_name, table_name, target_service, database):
|
681
698
|
username, password, host, port = get_myconf.select_config_values(target_service=target_service, database=database)
|
682
|
-
|
699
|
+
filename = os.path.basename(file)
|
683
700
|
df = pd.read_csv(file, encoding='utf-8_sig', header=0, na_filter=False)
|
684
701
|
m = MysqlUpload(username=username, password=password, host=host, port=port)
|
685
|
-
m.df_to_mysql(df=df, db_name=db_name, table_name=table_name)
|
702
|
+
m.df_to_mysql(df=df, db_name=db_name, table_name=table_name, filename=filename)
|
686
703
|
|
687
704
|
|
688
705
|
if __name__ == '__main__':
|
689
|
-
username, password, host, port = get_myconf.select_config_values(target_service='
|
690
|
-
print(username, password, host, port)
|
706
|
+
# username, password, host, port = get_myconf.select_config_values(target_service='home_lx', database='mysql')
|
707
|
+
# print(username, password, host, port)
|
691
708
|
|
692
|
-
file = '/Users/xigua/
|
709
|
+
file = '/Users/xigua/数据中心/原始文件2/生意经/宝贝指标/2024-08/baobeitrans-2024-08-22.csv'
|
693
710
|
one_file_to_mysql(
|
694
711
|
file=file,
|
695
|
-
db_name='
|
696
|
-
table_name='
|
697
|
-
target_service='
|
712
|
+
db_name='生意经2',
|
713
|
+
table_name='宝贝指标',
|
714
|
+
target_service='home_lx',
|
698
715
|
database='mysql'
|
699
716
|
)
|
700
717
|
|
701
|
-
|
702
|
-
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{mdbq-1.1.0 → mdbq-1.1.2}/mdbq/mysql/data_types_/345/215/263/345/260/206/345/210/240/351/231/244.py"
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|