mdbq 2.8.9__py3-none-any.whl → 2.9.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -128,6 +128,11 @@ class DataClean:
128
128
  '数据库名': '生意参谋3',
129
129
  '集合名称': '直播分场次效果',
130
130
  },
131
+ {
132
+ '文件简称': 'crm_客户列表_', # 文件名中包含的字符
133
+ '数据库名': '生意参谋3',
134
+ '集合名称': 'crm成交客户',
135
+ },
131
136
  ]
132
137
  for root, dirs, files in os.walk(path, topdown=False):
133
138
  for name in files:
@@ -180,6 +185,8 @@ class DataClean:
180
185
  # new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
181
186
  # self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
182
187
  # os.remove(os.path.join(root, name))
188
+ elif name.endswith('.csv') and 'crm_客户列表' in name:
189
+ df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
183
190
 
184
191
  # 将数据传入 self.datas 等待更新进数据库
185
192
  if not db_name or not collection_name:
@@ -209,6 +216,11 @@ class DataClean:
209
216
  '数据库名': '达摩盘3',
210
217
  '集合名称': 'dmp人群报表',
211
218
  },
219
+ {
220
+ '文件简称': '货品洞察_全店单品', # 文件名中包含的字符
221
+ '数据库名': '达摩盘3',
222
+ '集合名称': '货品洞察_全店单品',
223
+ },
212
224
  ]
213
225
  for root, dirs, files in os.walk(path, topdown=False):
214
226
  for name in files:
@@ -239,6 +251,8 @@ class DataClean:
239
251
  df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
240
252
  elif name.endswith('.csv') and 'dmp人群报表_' in name:
241
253
  df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
254
+ elif name.endswith('.csv') and '货品洞察_全店单品' in name:
255
+ df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
242
256
 
243
257
  # 将数据传入 self.datas 等待更新进数据库
244
258
  if not db_name or not collection_name:
@@ -831,6 +845,9 @@ class DataClean:
831
845
  elif name.endswith('.csv') and 'dmp人群报表_' in name:
832
846
  t_path = os.path.join(self.source_path, '达摩盘', 'dmp人群报表')
833
847
  bib(t_path, _as_month=True)
848
+ elif name.endswith('.csv') and '货品洞察_全店单品' in name:
849
+ t_path = os.path.join(self.source_path, '达摩盘', '货品洞察')
850
+ bib(t_path, _as_month=True)
834
851
 
835
852
  # @try_except
836
853
  def move_sjy(self, path=None, is_except=[]):
@@ -1283,7 +1300,11 @@ def main(is_mysql=False, is_company=False):
1283
1300
  cn.sp_scene_clean(is_except=['except']) # 商品素材
1284
1301
  cn.upload_df() # 上传数据库
1285
1302
 
1286
- if not is_company:
1303
+ if is_company: # 公司移除所有文件
1304
+ files = os.listdir(upload_path)
1305
+ for file in files:
1306
+ os.remove(os.path.join(upload_path, file))
1307
+ else: # 其他主机则进行文件分类
1287
1308
  cn.move_sycm(is_except=['临时文件', ]) # 生意参谋,移到文件到原始文件夹
1288
1309
  cn.move_dmp(is_except=['临时文件', ]) # 达摩盘
1289
1310
  cn.move_sjy(is_except=['临时文件',]) # 生意经,移到文件到原始文件夹
mdbq/company/copysh.py CHANGED
@@ -304,7 +304,7 @@ class TbFiles:
304
304
  def refresh_excel(self):
305
305
  # 刷新共享位置的指定文件/文件夹
306
306
  if platform.system() == 'Windows' and socket.gethostname() == 'company':
307
- excel_path = os.path.join(self.share_path, 'EXCEL报表')
307
+ excel_path = os.path.join(Share_Path, 'EXCEL报表')
308
308
  files = os.listdir(excel_path)
309
309
  files = [f'{excel_path}\\{item}' for item in files if item.endswith('.xlsx') or item.endswith('.xls')]
310
310
  r = refresh_all.RefreshAll()
@@ -52,7 +52,9 @@ class DataFrameConverter(object):
52
52
  except Exception as e:
53
53
  print(f'留意错误信息: 位于列 -> {col} -> {e}')
54
54
 
55
- if col.endswith('占比') or col.endswith('率'):
55
+ if (col.endswith('占比') or col.endswith('率') or col.endswith('同比')
56
+ or col.endswith('环比') or col.lower().endswith('roi')
57
+ or col.endswith('产出比')):
56
58
  df = df.astype({col: float}, errors='raise')
57
59
 
58
60
  # 尝试转换合适的数据类型
mdbq/mysql/mysql.py CHANGED
@@ -41,6 +41,63 @@ warnings.filterwarnings('ignore')
41
41
  """
42
42
 
43
43
 
44
+ def is_valid_date(date_string):
45
+ """ 判断是否是日期格式, 且允许没有前导零, 且允许带时间 """
46
+ date_pattern = r"^(\d{4})-(0?[1-9]|1[0-2])-(0?[1-9]|[12]\d|3[01])$"
47
+ match = re.match(date_pattern, str(date_string)) # 判断纯日期格式:2024-11-09
48
+ if match is None:
49
+ date_pattern = r".*\d+:\d+:\d+$"
50
+ match = re.match(date_pattern, date_string) # 判断日期+时间:2024-11-09 00:36:45
51
+ if match is not None:
52
+ return 2
53
+ else:
54
+ return 1
55
+
56
+
57
+ def is_integer(int_str):
58
+ """ 判断是否整数, 允许包含千分位分隔符, 允许科学计数法 """
59
+ # 如果是科学计数法
60
+ match = re.findall(r'^[-+]?(\d+)\.(\d+)[eE][-+]?(\d+)$', str(int_str))
61
+ if match:
62
+ if len(match[0]) == 3:
63
+ if int(match[0][0]) == 0: # 0 开头
64
+ if int(match[0][2]) > 10: # 转换后整数长度超过 10 位
65
+ return False
66
+ else: # 不是 0 开头
67
+ if len(match[0][0]) + int(match[0][2]) > 10: # 转换后整数长度超过 10 位
68
+ return False
69
+ if int(match[0][2]) >= len(match[0][1]):
70
+ return True
71
+ else:
72
+ return False
73
+ # 如果是普通数字, 且允许千分符
74
+ __pattern = r'^[-+]?\d{1,3}(,\d{3}){0,3}$|^[-+]?\d{1,9}$'
75
+ return re.match(__pattern, str(int_str)) is not None
76
+
77
+
78
+ def count_decimal_places(num_str):
79
+ """ 计算小数位数, 允许科学计数法 """
80
+ match = re.match(r'^[-+]?\d+(\.\d+)?([eE][-+]?\d+)?$', str(num_str))
81
+ if match:
82
+ # 如果是科学计数法
83
+ match = re.findall(r'(\d+)\.(\d+)[eE][-+]?(\d+)$', str(num_str))
84
+ if match:
85
+ if len(match[0]) == 3:
86
+ if int(match[0][2]) < len(match[0][1]):
87
+ # count_int 清除整数部分开头的 0 并计算整数位数
88
+ count_int = len(re.sub('^0+', '', str(match[0][0]))) + int(match[0][2])
89
+ # 计算小数位数
90
+ count_float = len(match[0][1]) - int(match[0][2])
91
+ return count_int, count_float
92
+ # 如果是普通小数
93
+ match = re.findall(r'(\d+)\.(\d+)$', str(num_str))
94
+ if match:
95
+ count_int = len(re.sub('^0+', '', str(match[0][0])))
96
+ count_float = len(match[0][1])
97
+ return count_int, count_float # 计算小数位数
98
+ return 0, 0
99
+
100
+
44
101
  class MysqlUpload:
45
102
  def __init__(self, username: str, password: str, host: str, port: int, charset: str = 'utf8mb4'):
46
103
  self.username = username
@@ -68,6 +125,127 @@ class MysqlUpload:
68
125
 
69
126
  return wrapper
70
127
 
128
+ def dict_to_mysql(self, db_name, table_name, dict_data, main_key=None, unique_main_key=None, index_length=100):
129
+ """
130
+ 插入字典数据
131
+ dict_data: 字典
132
+ main_key: 指定索引列
133
+ unique_main_key: 指定唯一索引列
134
+ index_length: 索引长度
135
+ """
136
+
137
+ if not main_key:
138
+ main_key = []
139
+ if not unique_main_key:
140
+ unique_main_key = []
141
+ connection = pymysql.connect(**self.config) # 连接数据库
142
+ with connection.cursor() as cursor:
143
+ cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
144
+ database_exists = cursor.fetchone()
145
+ if not database_exists:
146
+ # 如果数据库不存在,则新建
147
+ if '8.138.27' in str(self.host) or platform.system() == "Linux": # 阿里云 mysql 低版本不支持 0900
148
+ sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_unicode_ci"
149
+ self.config.update({'charset': 'utf8mb4_unicode_ci'})
150
+ if '192.168.1.100' in str(self.host):
151
+ sql = f"CREATE DATABASE `{db_name}`"
152
+ else:
153
+ sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_0900_ai_ci"
154
+ cursor.execute(sql)
155
+ connection.commit()
156
+ print(f"创建Database: {db_name}")
157
+
158
+ self.config.update({'database': db_name}) # 添加更新 config 字段
159
+ connection = pymysql.connect(**self.config) # 重新连接数据库
160
+ with connection.cursor() as cursor:
161
+ # 1. 查询表, 不存在则创建一个空表
162
+ sql = "SHOW TABLES LIKE %s;" # 有特殊字符不需转义
163
+ cursor.execute(sql, (table_name))
164
+ if not cursor.fetchone():
165
+ sql = f"CREATE TABLE IF NOT EXISTS `{table_name}` (id INT AUTO_INCREMENT PRIMARY KEY);"
166
+ cursor.execute(sql)
167
+ print(f'创建 mysql 表: {table_name}')
168
+
169
+ # 根据 dict_data 的值添加指定的数据类型
170
+ dtypes = self.cover_dict_dtypes(dict_data=dict_data) # {'店铺名称': 'mediumtext',...}
171
+ # 检查列
172
+ sql = "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s;"
173
+ cursor.execute(sql, (db_name, table_name))
174
+ col_exist = [item['COLUMN_NAME'] for item in cursor.fetchall()] # 已存在的所有列
175
+ col_not_exist = [col for col in dict_data.keys() if col not in col_exist] # 不存在的列
176
+ # 不存在则新建列
177
+ if col_not_exist: # 数据表中不存在的列
178
+ for col in col_not_exist:
179
+ # 创建列,需转义
180
+ sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} NOT NULL;"
181
+ cursor.execute(sql)
182
+ print(f"添加列: {col}({dtypes[col]})") # 添加列并指定数据类型
183
+
184
+ if col in main_key or col in unique_main_key:
185
+ sql = f"SHOW INDEXES FROM `{table_name}` WHERE `Column_name` = %s"
186
+ cursor.execute(sql, (col))
187
+ result = cursor.fetchone() # 检查索引是否存在
188
+ if not result:
189
+ if col in main_key:
190
+ cursor.execute(f"CREATE INDEX index_name ON `{table_name}`(`{col}`({index_length}))")
191
+ print(f"创建索引列: {col}({dtypes[col]})") # 添加列并指定数据类型
192
+ elif col in unique_main_key:
193
+ cursor.execute(f"CREATE UNIQUE INDEX index_name ON `{table_name}`(`{col}`({index_length}))")
194
+ print(f"创建唯一索引: {col}({dtypes[col]})") # 添加列并指定数据类型
195
+ connection.commit() # 提交事务
196
+
197
+ keys_data = ', '.join([f'`{str(item)}`' for item in dict_data.keys()])
198
+ values_data = ', '.join(f'"{str(item)}"' for item in dict_data.values())
199
+ del dict_data['客户id']
200
+ update_datas = ', '.join([f'{k} = VALUES({k})' for k, v in dict_data.items()])
201
+ # print(update_datas)
202
+ sql = f"INSERT INTO %s (%s) VALUES (%s) ON DUPLICATE KEY UPDATE %s" % (table_name, keys_data, values_data, update_datas)
203
+ # print(sql)
204
+ cursor.execute(sql)
205
+ connection.commit() # 提交事务
206
+ connection.close()
207
+
208
+
209
+ def cover_dict_dtypes(self, dict_data):
210
+ if not dict_data:
211
+ print(f'mysql.py -> MysqlUpload -> cover_dict_dtypes -> 传入的字典不能为空')
212
+ return
213
+ __res_dict = {}
214
+ for k, v in dict_data.items():
215
+ result1 = re.findall(r'商品编码|[商品宝贝]_?id|s[kp]u_?id|货号|款号|文件大小', str(k), re.IGNORECASE)
216
+ result2 = re.findall(r'占比|投产|产出|同比|环比|roi$', str(k), re.IGNORECASE)
217
+ date_type = is_valid_date(str(v)) # 判断日期时间
218
+ int_num = is_integer(str(v)) # 判断整数
219
+ count_int, count_float = count_decimal_places(str(v)) # 判断小数,返回小数位数
220
+ if result1: # 京东sku/spu商品信息
221
+ __res_dict.update({k: 'mediumtext'})
222
+ elif result2:
223
+ __res_dict.update({k: 'decimal(10,2)'})
224
+ elif str(v) == '' or str(v).lower() == 'nan' or str(v).lower() == 'null':
225
+ v = 0
226
+ dict_data.update({k: v})
227
+ __res_dict.update({k: 'mediumtext'})
228
+ elif date_type == 1:
229
+ __res_dict.update({k: 'DATE'})
230
+ elif date_type == 2:
231
+ __res_dict.update({k: 'DATETIME'})
232
+ elif int_num:
233
+ __res_dict.update({k: 'INT'})
234
+ elif count_float > 0:
235
+ if count_int + count_float > 10:
236
+ __res_dict.update({k: 'mediumtext'})
237
+ elif count_float >= 6:
238
+ __res_dict.update({k: 'decimal(12,6)'})
239
+ elif count_float >= 4:
240
+ __res_dict.update({k: 'decimal(10,4)'})
241
+ else:
242
+ __res_dict.update({k: 'decimal(10,2)'})
243
+ else:
244
+ __res_dict.update({k: 'mediumtext'})
245
+ return __res_dict
246
+
247
+
248
+
71
249
  @try_except
72
250
  def df_to_mysql(self, df, table_name, db_name='远程数据源', icm_update=[], service_database={'xigua_lx': 'mysql'}, move_insert=False, df_sql=False, drop_duplicates=False, filename=None, count=None, json_path=None, reset_id=False):
73
251
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 2.8.9
3
+ Version: 2.9.2
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -9,10 +9,10 @@ mdbq/aggregation/query_data.py,sha256=iRgPljgOPE7dzhaaVxRXOEOOKQTmWg6sGsDplNLTvQ
9
9
  mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
10
10
  mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
11
11
  mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
12
- mdbq/clean/clean_upload.py,sha256=FJxoEX-2QKuFhrF1ecl_LdZ1uFnVPx4HigNcXdErB28,66561
12
+ mdbq/clean/clean_upload.py,sha256=yMAb6tV9XHhFJbRrCOeaPfszApJ9y5M4-hQGuBSXNqE,67799
13
13
  mdbq/clean/data_clean.py,sha256=ucfslhqXVZoH2QaXHSAWDky0GhIvH9f4GeNaHg4SrFE,104790
14
14
  mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
15
- mdbq/company/copysh.py,sha256=UD5BLBe9uMfqjdslyY7-TtGXuJI5jsol-w4kIQFDfQk,21577
15
+ mdbq/company/copysh.py,sha256=eFu6focRqm2Njn_XN1KW2ZYJiTv6EYgsdBCLokobyxQ,21572
16
16
  mdbq/company/copysh_bak.py,sha256=NvlXCBZBcO2GIT5nLRYYqhOyHWM1-1RE7DHvgbj6jmQ,19723
17
17
  mdbq/company/home_sh.py,sha256=42CZ2tZIXHLl2mOl2gk2fZnjH2IHh1VJ1s3qHABjonY,18021
18
18
  mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
@@ -22,13 +22,13 @@ mdbq/config/products.py,sha256=Sj4FSb2dZcMKp6ox-FJdIR87QLgMN_TJ7Z6KAWMTWyw,6214
22
22
  mdbq/config/set_support.py,sha256=xkZCX6y9Bq1ppBpJAofld4B2YtchA7fl0eT3dx3CrSI,777
23
23
  mdbq/config/update_conf.py,sha256=taL3ZqKgiVWwUrDFuaYhim9a72Hm4BHRhhDscJTziR8,4535
24
24
  mdbq/dataframe/__init__.py,sha256=2HtCN8AdRj53teXDqzysC1h8aPL-mMFy561ESmhehGQ,22
25
- mdbq/dataframe/converter.py,sha256=X5Aubm9Z4_bhslcu1-XZzT8X6UpoAW5BFs30RfgfRmE,4460
25
+ mdbq/dataframe/converter.py,sha256=s-5EzHU9HlcSHlKgRd3autGODaKn523av1N0gh_56YY,4614
26
26
  mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
27
27
  mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
28
28
  mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
29
29
  mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
30
30
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
31
- mdbq/mysql/mysql.py,sha256=apcj0WDdbrHr7UzO2kjcesDxDUlWxG4KcIpI1mBuwMk,46152
31
+ mdbq/mysql/mysql.py,sha256=jq2_lQFTnQR6N6QdSqxKqjennazITh5TdTg6j7oerYE,55006
32
32
  mdbq/mysql/recheck_mysql.py,sha256=jHQSlQy0PlQ_EYICQv_2nairUX3t6OIwPtSELKIpjkY,8702
33
33
  mdbq/mysql/s_query.py,sha256=bgNNIqYLDCHjD5KTFcm6x4u74selpAGs5ouJYuqX86k,8447
34
34
  mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
@@ -45,7 +45,7 @@ mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
45
45
  mdbq/req_post/req_tb.py,sha256=PexWSCPJNM6Tv0ol4lAWIhlOwsAr_frnjtcdSHCFiek,36179
46
46
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
47
47
  mdbq/spider/aikucun.py,sha256=jHrdGWBJQaSywx7V-U4YuM6vWkwC5SR5tTOOdB3YU_c,17306
48
- mdbq-2.8.9.dist-info/METADATA,sha256=2_jXKvIIyj0iXk8T4uADVdvpKKF_kxFoWlN75PLoQlo,243
49
- mdbq-2.8.9.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
50
- mdbq-2.8.9.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
51
- mdbq-2.8.9.dist-info/RECORD,,
48
+ mdbq-2.9.2.dist-info/METADATA,sha256=3_u-jNgpFq2qX6uv2ufoYgPirGomwoz9IZkYmRp1BIc,243
49
+ mdbq-2.9.2.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
50
+ mdbq-2.9.2.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
51
+ mdbq-2.9.2.dist-info/RECORD,,
File without changes