mdbq 1.4.1__py3-none-any.whl → 1.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -154,90 +154,91 @@ class DataTypes:
154
154
  return {}, cl, None, None # 返回这些结果的目的是等添加完列再写 json 文件才能读到 types 信息
155
155
 
156
156
 
157
- def mysql_all_dtypes(db_name=None, table_name=None, path=None, system_name=None):
157
+ def mysql_all_dtypes(db_name=None, table_name=None, service_databases=[{'home_lx': 'mysql'}], path=None, system_name=None):
158
158
  """
159
159
  更新笔记本 mysql 中所有数据库的 dtypes 信息到本地 json
160
160
  """
161
-
162
- username, password, host, port = get_myconf.select_config_values(target_service='home_lx', database='mysql')
163
- config = {
164
- 'host': host,
165
- 'port': port,
166
- 'user': username,
167
- 'password': password,
168
- 'charset': 'utf8mb4', # utf8mb4 支持存储四字节的UTF-8字符集
169
- 'cursorclass': pymysql.cursors.DictCursor,
170
- }
171
-
172
- connection = pymysql.connect(**config) # 连接数据库
173
- with connection.cursor() as cursor:
174
- sql = "SHOW DATABASES;"
175
- cursor.execute(sql)
176
- db_name_lists = cursor.fetchall()
177
- db_name_lists = [item['Database'] for item in db_name_lists]
178
- connection.close()
179
-
180
- sys_lists = ['information_schema', 'mysql', 'performance_schema', 'sakila', 'sys']
181
- db_name_lists = [item for item in db_name_lists if item not in sys_lists]
182
-
183
- # db_name_lists = [
184
- # '京东数据2',
185
- # '推广数据2',
186
- # '市场数据2',
187
- # '生意参谋2',
188
- # '生意经2',
189
- # '属性设置2',
190
- # '聚合数据',
191
- # ]
192
- results = [] # 返回结果示例: [{'云电影': '电影更新'}, {'生意经2': 'e3_零售明细统计'}]
193
- for db_ in db_name_lists:
194
- config.update({'database': db_}) # 添加更新 config 字段
195
- connection = pymysql.connect(**config) # 连接数据库
196
- try:
161
+ for service_database in service_databases:
162
+ for service_name, database in service_database.items():
163
+ username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
164
+ config = {
165
+ 'host': host,
166
+ 'port': port,
167
+ 'user': username,
168
+ 'password': password,
169
+ 'charset': 'utf8mb4', # utf8mb4 支持存储四字节的UTF-8字符集
170
+ 'cursorclass': pymysql.cursors.DictCursor,
171
+ }
172
+
173
+ connection = pymysql.connect(**config) # 连接数据库
197
174
  with connection.cursor() as cursor:
198
- sql = f"SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{db_}';"
199
- sql = "SHOW TABLES;"
175
+ sql = "SHOW DATABASES;"
200
176
  cursor.execute(sql)
201
- res_tables = cursor.fetchall()
202
- for res_table in res_tables:
203
- for k, v in res_table.items():
204
- results.append({db_: v})
205
- except:
206
- pass
207
- finally:
208
- connection.close()
209
- time.sleep(0.5)
210
-
211
- d = DataTypes(path=path, system_name=system_name)
212
- for result in results:
213
- for db_n, table_n in result.items():
214
- # print(db_n, table_n, db_name, table_name)
215
- if db_name and table_name: # 下载一个指定的数据表
216
- if db_name != db_n or table_name != table_n:
217
- continue
218
- elif db_name: # 下载一个数据库的所有数据表
219
- if db_name != db_n:
220
- continue
221
- # 如果 db_name 和 table_name 都不指定,则下载所有数据库的所有数据表
222
- print(f'获取列信息 数据库: < {db_n} >, 数据表: < {table_n} >')
223
- sq = s_query.QueryDatas(username=username, password=password, host=host, port=port)
224
- # 获取数据表的指定列, 返回列表
225
- # [{'视频bv号': 'BV1Dm4y1S7BU', '下载进度': 1}, {'视频bv号': 'BV1ov411c7US', '下载进度': 1}]
226
- name_type = sq.dtypes_to_list(db_name=db_n, table_name=table_n)
227
- if name_type:
228
- dtypes = {item['COLUMN_NAME']: item['COLUMN_TYPE'] for item in name_type}
229
- dtypes = {'mysql': {db_n: {table_n: dtypes}}}
230
- d.get_mysql_types(
231
- dtypes=dtypes,
232
- cl='mysql',
233
- db_name=db_n,
234
- table_name=table_n,
235
- is_file_dtype=True # True表示旧文件有限
236
- )
237
- else:
238
- print(f'数据库回传数据(name_type)为空')
239
- # print(d.datas)
240
- d.as_json_file()
177
+ db_name_lists = cursor.fetchall()
178
+ db_name_lists = [item['Database'] for item in db_name_lists]
179
+ connection.close()
180
+
181
+ sys_lists = ['information_schema', 'mysql', 'performance_schema', 'sakila', 'sys']
182
+ db_name_lists = [item for item in db_name_lists if item not in sys_lists]
183
+
184
+ # db_name_lists = [
185
+ # '京东数据2',
186
+ # '推广数据2',
187
+ # '市场数据2',
188
+ # '生意参谋2',
189
+ # '生意经2',
190
+ # '属性设置2',
191
+ # '聚合数据',
192
+ # ]
193
+ results = [] # 返回结果示例: [{'云电影': '电影更新'}, {'生意经2': 'e3_零售明细统计'}]
194
+ for db_ in db_name_lists:
195
+ config.update({'database': db_}) # 添加更新 config 字段
196
+ connection = pymysql.connect(**config) # 连接数据库
197
+ try:
198
+ with connection.cursor() as cursor:
199
+ sql = f"SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{db_}';"
200
+ sql = "SHOW TABLES;"
201
+ cursor.execute(sql)
202
+ res_tables = cursor.fetchall()
203
+ for res_table in res_tables:
204
+ for k, v in res_table.items():
205
+ results.append({db_: v})
206
+ except:
207
+ pass
208
+ finally:
209
+ connection.close()
210
+ time.sleep(0.5)
211
+
212
+ d = DataTypes(path=path, system_name=system_name)
213
+ for result in results:
214
+ for db_n, table_n in result.items():
215
+ # print(db_n, table_n, db_name, table_name)
216
+ if db_name and table_name: # 下载一个指定的数据表
217
+ if db_name != db_n or table_name != table_n:
218
+ continue
219
+ elif db_name: # 下载一个数据库的所有数据表
220
+ if db_name != db_n:
221
+ continue
222
+ # 如果 db_name 和 table_name 都不指定,则下载所有数据库的所有数据表
223
+ print(f'获取列信息 数据库: < {db_n} >, 数据表: < {table_n} >')
224
+ sq = s_query.QueryDatas(username=username, password=password, host=host, port=port)
225
+ # 获取数据表的指定列, 返回列表
226
+ # [{'视频bv号': 'BV1Dm4y1S7BU', '下载进度': 1}, {'视频bv号': 'BV1ov411c7US', '下载进度': 1}]
227
+ name_type = sq.dtypes_to_list(db_name=db_n, table_name=table_n)
228
+ if name_type:
229
+ dtypes = {item['COLUMN_NAME']: item['COLUMN_TYPE'] for item in name_type}
230
+ dtypes = {'mysql': {db_n: {table_n: dtypes}}}
231
+ d.get_mysql_types(
232
+ dtypes=dtypes,
233
+ cl='mysql',
234
+ db_name=db_n,
235
+ table_name=table_n,
236
+ is_file_dtype=True # True表示旧文件有限
237
+ )
238
+ else:
239
+ print(f'数据库回传数据(name_type)为空')
240
+ # print(d.datas)
241
+ d.as_json_file()
241
242
 
242
243
 
243
244
  if __name__ == '__main__':
@@ -494,7 +494,45 @@ class GroupBy:
494
494
  print(f'{filename}: sort_values排序参数错误!')
495
495
  df.to_excel(os.path.join(path, filename + '.xlsx'), index=index, header=header, engine=engine, freeze_panes=freeze_panes)
496
496
 
497
-
497
+
498
+ def data_aggregation_one(service_databases=[{}], months=1, system_name=None,):
499
+ """
500
+ # 单独处理某一个聚合数据库,修改添加 data_dict 的值
501
+ """
502
+ for service_database in service_databases:
503
+ for service_name, database in service_database.items():
504
+ sdq = MysqlDatasQuery(target_service=service_name) # 实例化数据处理类
505
+ sdq.months = months # 设置数据周期, 1 表示近 2 个月
506
+ g = GroupBy() # 实例化数据聚合类
507
+ # 实例化数据库连接
508
+ username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
509
+ m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
510
+
511
+ # 从数据库中获取数据, 返回包含 df 数据的字典
512
+ # 单独处理某一个聚合数据库,在这里修改添加 data_dict 的值
513
+ data_dict = [
514
+ {
515
+ '数据库名': '聚合数据',
516
+ '集合名': '天猫_店铺来源_日数据_旧版',
517
+ '唯一主键': ['日期', '一级来源', '二级来源', '三级来源'],
518
+ '数据主体': sdq.dplyd(),
519
+ },
520
+ ]
521
+ for items in data_dict: # 遍历返回结果
522
+ db_name, table_name, unique_key_list, df = items['数据库名'], items['集合名'], items['唯一主键'], items['数据主体']
523
+ df = g.groupby(df=df, table_name=table_name, is_maximize=True) # 2. 聚合数据
524
+ # g.as_csv(df=df, filename=table_name + '.csv') # 导出 csv
525
+ m.df_to_mysql(
526
+ df=df,
527
+ db_name=db_name,
528
+ table_name=table_name,
529
+ drop_dup=False,
530
+ icm_update=unique_key_list,
531
+ system_name=system_name,
532
+ service_databases=service_databases,
533
+ ) # 3. 回传数据库
534
+
535
+
498
536
  def data_aggregation(service_databases=[{}], months=1, system_name=None,):
499
537
  """
500
538
  1. 从数据库中读取数据
@@ -561,6 +599,7 @@ def data_aggregation(service_databases=[{}], months=1, system_name=None,):
561
599
  drop_dup=False,
562
600
  icm_update=unique_key_list,
563
601
  system_name=system_name,
602
+ service_databases=service_databases,
564
603
  ) # 3. 回传数据库
565
604
  res = g.performance(bb_tg=True) # 盈亏表,依赖其他表,单独做
566
605
  m.df_to_mysql(
@@ -570,6 +609,7 @@ def data_aggregation(service_databases=[{}], months=1, system_name=None,):
570
609
  drop_dup=False,
571
610
  icm_update=['日期', '商品id'], # 设置唯一主键
572
611
  system_name = system_name,
612
+ service_databases=service_databases,
573
613
  )
574
614
  res = g.performance(bb_tg=False) # 盈亏表,依赖其他表,单独做
575
615
  m.df_to_mysql(
@@ -579,6 +619,7 @@ def data_aggregation(service_databases=[{}], months=1, system_name=None,):
579
619
  drop_dup=False,
580
620
  icm_update=['日期', '商品id'], # 设置唯一主键
581
621
  system_name=system_name,
622
+ service_databases=service_databases,
582
623
  )
583
624
 
584
625
  # 这里要注释掉,不然 copysh.py 可能有问题,这里主要修改配置文件,后续触发 home_lx 的 optimize_datas.py(有s)程序进行全局清理
@@ -586,5 +627,6 @@ def data_aggregation(service_databases=[{}], months=1, system_name=None,):
586
627
 
587
628
 
588
629
  if __name__ == '__main__':
589
- data_aggregation(service_databases=[{'company': 'mysql'}], months=1, system_name='company')
630
+ # data_aggregation(service_databases=[{'company': 'mysql'}], months=1, system_name='company')
631
+ data_aggregation_one(service_databases=[{'company': 'mysql'}], months=10, system_name='company')
590
632
  # optimize_data.op_data(service_databases=[{'company': 'mysql'}], days=3650) # 立即启动对聚合数据的清理工作
mdbq/company/copysh.py CHANGED
@@ -289,7 +289,7 @@ class UpdateMysql:
289
289
  return False, self.d_path
290
290
 
291
291
 
292
- def op_data(days: int =3650):
292
+ def op_data(days: int =100):
293
293
 
294
294
  # 清理数据库, 除了 聚合数据
295
295
  if socket.gethostname() == 'company': # 公司台式机自身运行
@@ -322,11 +322,11 @@ def op_data(days: int =3650):
322
322
  '属性设置2',
323
323
  # '聚合数据', # 不在这里清理聚合数据, 还未开始聚合呢
324
324
  ],
325
- days=3650,
325
+ days=days,
326
326
  )
327
327
 
328
328
  # 数据聚合
329
- query_data.data_aggregation(service_databases=[{'home_lx': 'mysql'}], months=1, system_name='home_lx')
329
+ query_data.data_aggregation(service_databases=[{'home_lx': 'mysql'}], months=3, system_name='home_lx')
330
330
  time.sleep(60)
331
331
 
332
332
  # 清理聚合数据
@@ -352,7 +352,7 @@ def main():
352
352
  w = update_conf.UpdateConf()
353
353
  w.update_config(filename='.copysh_conf', option='ch_record', new_value='False')
354
354
  time.sleep(60)
355
- op_data(days=3650) # 数据清理和聚合
355
+ op_data(days=100) # 数据清理和聚合
356
356
 
357
357
  t.sleep_minutes = 30 # 同步前休眠时间
358
358
  t.tb_file()
mdbq/mysql/mysql.py CHANGED
@@ -57,7 +57,7 @@ class MysqlUpload:
57
57
  }
58
58
  self.filename = None
59
59
 
60
- def df_to_mysql(self, df, table_name, db_name='远程数据源', icm_update=[], icm_up=[], df_sql=False, drop_dup=False, drop_duplicates=False, filename=None, count=None, json_path=None, system_name=None):
60
+ def df_to_mysql(self, df, table_name, db_name='远程数据源', icm_update=[], icm_up=[], service_databases=[{'home_lx': 'mysql'}], df_sql=False, drop_dup=False, drop_duplicates=False, filename=None, count=None, json_path=None, system_name=None):
61
61
  """
62
62
  将 df 写入数据库
63
63
  db_name: 数据库名称
@@ -167,11 +167,11 @@ class MysqlUpload:
167
167
  # 返回这些结果的目的是等添加完列再写 json 文件才能读到 types 信息
168
168
  # ⚠️ mysql_all_dtypes 函数默认只读取 home_lx 的数据库信息,不会读取其他系统
169
169
  if cl and db_n and tb_n:
170
- mysql_types.mysql_all_dtypes(db_name=db_name, table_name=table_name, system_name=system_name) # 更新一个表的 dtypes
170
+ mysql_types.mysql_all_dtypes(service_databases=service_databases, db_name=db_name, table_name=table_name, system_name=system_name) # 更新一个表的 dtypes
171
171
  elif cl and db_n:
172
- mysql_types.mysql_all_dtypes(db_name=db_name, system_name=system_name) # 更新一个数据库的 dtypes
172
+ mysql_types.mysql_all_dtypes(service_databases=service_databases, db_name=db_name, system_name=system_name) # 更新一个数据库的 dtypes
173
173
  elif cl:
174
- mysql_types.mysql_all_dtypes(system_name=system_name) # 更新所有数据库所有数据表的 dtypes 信息到本地 json
174
+ mysql_types.mysql_all_dtypes(service_databases=service_databases, system_name=system_name) # 更新所有数据库所有数据表的 dtypes 信息到本地 json
175
175
 
176
176
  # 4. 移除指定日期范围内的数据,仅限于聚合数据使用,其他情况不要设置
177
177
  if drop_duplicates and '日期' in df.columns.tolist():
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 1.4.1
3
+ Version: 1.4.3
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -3,15 +3,15 @@ mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
4
  mdbq/aggregation/aggregation.py,sha256=cEKhmlqvrChXlxyxEq5qNMAgL_3hp1xtt9heOxfu9Z4,58113
5
5
  mdbq/aggregation/df_types.py,sha256=oQJS2IBU3_IO6GMgbssHuC2yCjNnbta0QPGrFOwNLnU,7591
6
- mdbq/aggregation/mysql_types.py,sha256=jTQaCrDy9hj36GIImshXANyQNFAvVKJTybkzJSh-qJ8,10671
6
+ mdbq/aggregation/mysql_types.py,sha256=3j_SUTi7qYJyY3JtV_lRLIyczIczmRPE_WaIrXiZ6Rw,11425
7
7
  mdbq/aggregation/optimize_data.py,sha256=MnGEO4csKYHJ8MzzfsOf-r7yQbjO4ugthwdI-g6PRr4,4144
8
- mdbq/aggregation/query_data.py,sha256=qSCN3Y20Ken4cmwG63kMnvz9jg115PB9qvo-hF9Aq34,27504
8
+ mdbq/aggregation/query_data.py,sha256=2SFeTkgsNgQVjNVDi1K2zTsNSw9dwL19Xj7Peu7sfRg,29828
9
9
  mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
10
10
  mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
11
11
  mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
12
12
  mdbq/clean/data_clean.py,sha256=TnG0t6Ted9miMoBdGanpTmj6AUa0YFSyEBvmgN3V7sw,87558
13
13
  mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
14
- mdbq/company/copysh.py,sha256=NJWEnxOZhbnqbHPQfCxBNAcl-LLLEk4Gn9QP35XlA0c,17041
14
+ mdbq/company/copysh.py,sha256=XBnSalNHJPCBkswyiDpHhhVPqxRMOGcbAncWytrhCyQ,17039
15
15
  mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
16
16
  mdbq/config/get_myconf.py,sha256=bp6bVARZVm3ANj1pmM9hLB8Ao539TUWeM9xxeSsBpzw,5994
17
17
  mdbq/config/products.py,sha256=9gqXJMsw8KKuD4Xs6krNgcF7AuWDvV7clI6wVi3QjcA,4260
@@ -24,7 +24,7 @@ mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
24
24
  mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
25
25
  mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
26
26
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
27
- mdbq/mysql/mysql.py,sha256=kkkbFJHox0diPpDadwHicRQcDUxdH1h8MsMF1MNNBRI,44344
27
+ mdbq/mysql/mysql.py,sha256=PXgE5mjLzzFDzor7DmcudEYz0pwHZRiiFomdkBmGj7U,44497
28
28
  mdbq/mysql/s_query.py,sha256=a33aYhW6gAnspIZfQ7l23ePln9-MD1f_ukypr5M0jd8,8018
29
29
  mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
30
30
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
@@ -35,7 +35,7 @@ mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
35
35
  mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
36
36
  mdbq/pbix/refresh_all.py,sha256=tgy762608HMaXWynbOURIf2UVMuSPybzrDXQnOOcnZU,6102
37
37
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
38
- mdbq-1.4.1.dist-info/METADATA,sha256=6CQIPEW8CigYNwESE70LBIjNpEAGVSQ1wR1HRkHA8Kk,245
39
- mdbq-1.4.1.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
40
- mdbq-1.4.1.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
41
- mdbq-1.4.1.dist-info/RECORD,,
38
+ mdbq-1.4.3.dist-info/METADATA,sha256=O4RKaUpul7RzqaucQbThigiFnKGCDN9K8TVsiZKZvng,245
39
+ mdbq-1.4.3.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
40
+ mdbq-1.4.3.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
41
+ mdbq-1.4.3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (70.1.0)
2
+ Generator: bdist_wheel (0.44.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5