mdbq 0.1.3__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbq-0.1.3 → mdbq-0.1.5}/PKG-INFO +1 -1
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/dataframe/converter.py +8 -2
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/mysql/mysql.py +2 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq.egg-info/PKG-INFO +1 -1
- {mdbq-0.1.3 → mdbq-0.1.5}/setup.py +1 -1
- {mdbq-0.1.3 → mdbq-0.1.5}/README.txt +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/__init__.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/__version__.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/aggregation/__init__.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/aggregation/aggregation.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/aggregation/query_data.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/bdup/__init__.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/bdup/bdup.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/clean/__init__.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/clean/data_clean.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/company/__init__.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/company/copysh.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/config/__init__.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/config/get_myconf.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/config/update_conf.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/dataframe/__init__.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/log/__init__.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/log/mylogger.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/mongo/__init__.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/mongo/mongo.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/mysql/__init__.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/mysql/s_query.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/mysql/year_month_day.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/other/__init__.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/other/porxy.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/other/pov_city.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/other/ua_sj.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/pbix/__init__.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/pbix/pbix_refresh.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/pbix/refresh_all.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq/spider/__init__.py +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq.egg-info/SOURCES.txt +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq.egg-info/dependency_links.txt +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/mdbq.egg-info/top_level.txt +0 -0
- {mdbq-0.1.3 → mdbq-0.1.5}/setup.cfg +0 -0
@@ -24,6 +24,7 @@ class DataFrameConverter(object):
|
|
24
24
|
df.replace(to_replace=['="'], value='', regex=True, inplace=True) # ="和"不可以放在一起清洗, 因为有: id=86785565
|
25
25
|
df.replace(to_replace=['"'], value='', regex=True, inplace=True)
|
26
26
|
cols = df.columns.tolist()
|
27
|
+
|
27
28
|
for col in cols:
|
28
29
|
# df[col] = df[col].apply(lambda x: re.sub('[="]', '', str(x)) if '="' in str(x) else x)
|
29
30
|
# 百分比在某些数据库中不兼容, 转换百分比为小数
|
@@ -31,9 +32,14 @@ class DataFrameConverter(object):
|
|
31
32
|
# 尝试转换合适的数据类型
|
32
33
|
if df[col].dtype == 'object':
|
33
34
|
try:
|
34
|
-
df[col] = df[col].astype(int) # 尝试转换 int
|
35
|
+
# df[col] = df[col].astype(int) # 尝试转换 int
|
36
|
+
df[col] = df[col].apply(lambda x: int(x) if '_' not in str(x) else x)
|
35
37
|
except:
|
36
|
-
df[col] = df[col].astype('float64', errors='ignore') # 尝试转换 float, 报错则忽略
|
38
|
+
# df[col] = df[col].astype('float64', errors='ignore') # 尝试转换 float, 报错则忽略
|
39
|
+
try:
|
40
|
+
df[col] = df[col].apply(lambda x: float(x) if '_' not in str(x) else x)
|
41
|
+
except:
|
42
|
+
pass
|
37
43
|
if df[col].dtype == 'float': # 对于小数类型, 保留 6 位小数
|
38
44
|
df[col] = df[col].apply(lambda x: round(float(x), 6) if x != 0 else x)
|
39
45
|
# 清理列名, 在 mysql 里面列名不能含有某些特殊字符
|
@@ -181,6 +181,8 @@ class MysqlUpload:
|
|
181
181
|
# 最优先处理 ID 类型, 在 mysql 里面, 有些列数字过长不能存储为 int 类型
|
182
182
|
if 'id' in col or 'ID' in col or 'Id' in col or '摘要' in col or '商家编码' in col or '单号' in col or '款号' in col:
|
183
183
|
return 'mediumtext'
|
184
|
+
if '文件大小' in col: # bw 程序
|
185
|
+
return 'mediumtext'
|
184
186
|
elif '日期' in col or '时间' in col:
|
185
187
|
try:
|
186
188
|
k = pd.to_datetime(df[col].tolist()[0]) # 检查是否可以转为日期
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|