mdbq 3.8.8__py3-none-any.whl → 3.8.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/__version__.py CHANGED
@@ -1 +1 @@
1
- VERSION = '3.8.8'
1
+ VERSION = '3.8.10'
mdbq/mysql/mysql.py CHANGED
@@ -1,6 +1,5 @@
1
1
  # -*- coding:utf-8 -*-
2
2
  import datetime
3
- import platform
4
3
  import re
5
4
  import time
6
5
  from functools import wraps
@@ -10,8 +9,8 @@ import numpy as np
10
9
  import pandas as pd
11
10
  from sqlalchemy import create_engine
12
11
  import os
13
- import calendar
14
12
  import logging
13
+ from mdbq.other import otk
15
14
 
16
15
  warnings.filterwarnings('ignore')
17
16
  """
@@ -21,44 +20,6 @@ warnings.filterwarnings('ignore')
21
20
  logger = logging.getLogger(__name__)
22
21
 
23
22
 
24
- def is_valid_date(date_string):
25
- """
26
- 判断是否是日期格式, 且允许没有前导零, 且允许带时间
27
- 纯日期格式: 返回 1
28
- 日期+时间: 返回 2
29
- """
30
- date_pattern = r"^(\d{4})-(0?[1-9]|1[0-2])-(0?[1-9]|[12]\d|3[01])$"
31
- match = re.match(date_pattern, str(date_string)) # 判断纯日期格式:2024-11-09
32
- if match is None:
33
- date_pattern = r".*\d+:\d+:\d+$"
34
- match = re.match(date_pattern, date_string) # 判断日期+时间:2024-11-09 00:36:45
35
- if match is not None:
36
- return 2
37
- else:
38
- return 1
39
-
40
-
41
- def is_integer(int_str):
42
- """ 判断是否整数, 允许包含千分位分隔符, 允许科学计数法 """
43
- # 如果是科学计数法
44
- match = re.findall(r'^[-+]?(\d+)\.(\d+)[eE][-+]?(\d+)$', str(int_str))
45
- if match:
46
- if len(match[0]) == 3:
47
- if int(match[0][0]) == 0: # 0 开头
48
- if int(match[0][2]) > 10: # 转换后整数长度超过 10 位
49
- return False
50
- else: # 不是 0 开头
51
- if len(match[0][0]) + int(match[0][2]) > 10: # 转换后整数长度超过 10 位
52
- return False
53
- if int(match[0][2]) >= len(match[0][1]):
54
- return True
55
- else:
56
- return False
57
- # 如果是普通数字, 且允许千分符
58
- __pattern = r'^[-+]?\d{1,3}(,\d{3}){0,3}$|^[-+]?\d{1,9}$'
59
- return re.match(__pattern, str(int_str)) is not None
60
-
61
-
62
23
  def count_decimal_places(num_str):
63
24
  """ 计算小数位数, 允许科学计数法 """
64
25
  match = re.match(r'^[-+]?\d+(\.\d+)?([eE][-+]?\d+)?$', str(num_str))
@@ -144,8 +105,8 @@ class MysqlUpload:
144
105
  result3 = re.findall(r'同比$|环比$', k, re.IGNORECASE)
145
106
  result4 = re.findall(r'花费$|消耗$|金额$', k, re.IGNORECASE)
146
107
 
147
- date_type = is_valid_date(v) # 判断日期时间
148
- int_num = is_integer(v) # 判断整数
108
+ date_type = otk.is_valid_date(v) # 判断日期时间
109
+ int_num = otk.is_integer(v) # 判断整数
149
110
  count_int, count_float = count_decimal_places(v) # 判断小数,返回小数位数
150
111
  if result1: # 京东sku/spu商品信息
151
112
  __res_dict.update({k: 'varchar(100)'})
@@ -204,13 +165,7 @@ class MysqlUpload:
204
165
  database_exists = cursor.fetchone()
205
166
  if not database_exists:
206
167
  # 如果数据库不存在,则新建
207
- if '8.138.27' in str(self.host) or platform.system() == "Linux": # 阿里云 mysql 低版本不支持 0900
208
- sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_unicode_ci"
209
- self.config.update({'charset': 'utf8mb4_unicode_ci'})
210
- if '192.168.1.100' in str(self.host):
211
- sql = f"CREATE DATABASE `{db_name}`"
212
- else:
213
- sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_0900_ai_ci"
168
+ sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_0900_ai_ci"
214
169
  cursor.execute(sql)
215
170
  connection.commit()
216
171
  logger.info(f"创建Database: {db_name}")
@@ -369,13 +324,8 @@ class MysqlUpload:
369
324
  database_exists = cursor.fetchone()
370
325
  if not database_exists:
371
326
  # 如果数据库不存在,则新建
372
- if '8.138.27' in str(self.host) or platform.system() == "Linux": # 阿里云 mysql 低版本不支持 0900
373
- sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_unicode_ci"
374
- self.config.update({'charset': 'utf8mb4_unicode_ci'})
375
- if '192.168.1.100' in str(self.host):
376
- sql = f"CREATE DATABASE `{db_name}`"
377
- else:
378
- sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_0900_ai_ci"
327
+
328
+ sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_0900_ai_ci"
379
329
  cursor.execute(sql)
380
330
  connection.commit()
381
331
  logger.info(f"创建Database: {db_name}")
@@ -580,13 +530,7 @@ class MysqlUpload:
580
530
  database_exists = cursor.fetchone()
581
531
  if not database_exists:
582
532
  # 如果数据库不存在,则新建
583
- if '8.138.27' in str(self.host) or platform.system() == "Linux": # 阿里云 mysql 低版本不支持 0900
584
- sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_unicode_ci"
585
- self.config.update({'charset': 'utf8mb4_unicode_ci'})
586
- if '192.168.1.100' in str(self.host):
587
- sql = f"CREATE DATABASE `{db_name}`"
588
- else:
589
- sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_0900_ai_ci"
533
+ sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_0900_ai_ci"
590
534
  cursor.execute(sql)
591
535
  connection.commit()
592
536
  logger.info(f"创建Database: {db_name}")
@@ -770,8 +714,8 @@ class MysqlUpload:
770
714
  result3 = re.findall(r'同比$|环比$', k, re.IGNORECASE)
771
715
  result4 = re.findall(r'花费$|消耗$|金额$', k, re.IGNORECASE)
772
716
 
773
- date_type = is_valid_date(v) # 判断日期时间
774
- int_num = is_integer(v) # 判断整数
717
+ date_type = otk.is_valid_date(v) # 判断日期时间
718
+ int_num = otk.is_integer(v) # 判断整数
775
719
  count_int, count_float = count_decimal_places(v) # 判断小数,返回小数位数
776
720
  if result1: # 京东sku/spu商品信息
777
721
  __res_dict.update({k: 'varchar(100)'})
@@ -806,44 +750,9 @@ class MysqlUpload:
806
750
  new_dict_data.update({k: v})
807
751
  return __res_dict, new_dict_data
808
752
 
809
- def cover_df(self, df):
810
- """ 清理 df 的值和列名 """
811
- df.replace([np.inf, -np.inf], '0', inplace=True) # 清理一些非法值
812
- # df.replace(to_replace=['\\N', '-', '--', '', 'nan', 'NAN'], value='0', regex=False, inplace=True) # 替换掉特殊字符
813
- df.replace(to_replace=['\\N', '', 'nan', 'NAN'], value='0', regex=False, inplace=True) # 替换掉特殊字符
814
- # df.replace(to_replace=[','], value='', regex=True, inplace=True)
815
- df.replace(to_replace=['="'], value='', regex=True, inplace=True) # ="和"不可以放在一起清洗, 因为有: id=86785565
816
- df.replace(to_replace=['"'], value='', regex=True, inplace=True)
817
- cols = df.columns.tolist()
818
- for col in cols:
819
- if col == 'id':
820
- df.pop('id')
821
- continue
822
- df[col] = df[col].apply(lambda x: float(re.sub(r'%$', '', str(x))) / 100 if (
823
- str(x) != '' and str(x).endswith('%')) and not re.findall('[\\u4e00-\\u9fa5]', str(x)) else '0.0' if str(x) == '0%' else x)
824
- try:
825
- # 不能直接使用 int() ,对于大数,可能转为uint64,导致数据库入库可能异常
826
- df[col] = df[col].apply(
827
- lambda x: np.int64(str(x)) if '_' not in str(x) and '.' not in str(x) else x) # 不含小数点尝试转整数
828
- except:
829
- pass
830
- try:
831
- if df[col].dtype == 'object': # 有些列没有被 pandas 识别数据类型,会没有 dtype 属性
832
- df[col] = df[col].apply(lambda x: float(x) if '.' in str(x) and '_' not in str(x) else x)
833
- except:
834
- pass
835
- new_col = col.lower()
836
- new_col = re.sub(r'[()\-,,&~^、 ()\"\'“”=·/。》《><!!`]', '_', new_col, re.IGNORECASE)
837
- new_col = new_col.replace(')', '')
838
- new_col = re.sub(r'_{2,}', '_', new_col)
839
- new_col = re.sub(r'_+$', '', new_col)
840
- df.rename(columns={col: new_col}, inplace=True)
841
- df.fillna(0, inplace=True)
842
- return df
843
-
844
753
  def convert_df_dtypes(self, df: pd.DataFrame):
845
754
  """ 清理 df 的值和列名,并转换数据类型 """
846
- df = self.cover_df(df=df) # 清理 df 的值和列名
755
+ df = otk.cover_df(df=df) # 清理 df 的值和列名
847
756
  [pd.to_numeric(df[col], errors='ignore') for col in df.columns.tolist()]
848
757
  dtypes = df.dtypes.to_dict()
849
758
  __res_dict = {}
@@ -949,13 +858,7 @@ class MysqlUpload:
949
858
  database_exists = cursor.fetchone()
950
859
  if not database_exists:
951
860
  # 如果数据库不存在,则新建
952
- # if '8.138.27' in str(self.host) or platform.system() == "Linux": # 阿里云 mysql 低版本不支持 0900
953
- # sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_unicode_ci"
954
- # self.config.update({'charset': 'utf8mb4_unicode_ci'})
955
- if '192.168.1.100' in str(self.host):
956
- sql = f"CREATE DATABASE `{db_name}`"
957
- else:
958
- sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_0900_ai_ci"
861
+ sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_0900_ai_ci"
959
862
  cursor.execute(sql)
960
863
  connection.commit()
961
864
  logger.info(f"创建Database: {db_name}")
@@ -1705,31 +1608,5 @@ class OptimizeDatas:
1705
1608
  self.connection.close()
1706
1609
 
1707
1610
 
1708
- def year_month_day_bak(start_date, end_date):
1709
- """
1710
- 使用date_range函数和DataFrame来获取从start_date至end_date之间的所有年月日
1711
- calendar.monthrange: 获取当月第一个工作日的星期值(0,6) 以及当月天数
1712
- """
1713
- # 替换年月日中的日, 以便即使传入当月日期也有返回值
1714
- try:
1715
- start_date = f'{pd.to_datetime(start_date).year}-{pd.to_datetime(start_date).month}-01'
1716
- except Exception as e:
1717
- logger.error(e)
1718
- return []
1719
- # 使用pandas的date_range创建一个日期范围,频率为'MS'代表每月开始
1720
- date_range = pd.date_range(start=start_date, end=end_date, freq='MS')
1721
- # 转换格式
1722
- year_months = date_range.strftime('%Y-%m').drop_duplicates().sort_values()
1723
-
1724
- results = []
1725
- for year_month in year_months:
1726
- year = re.findall(r'(\d{4})', year_month)[0]
1727
- month = re.findall(r'\d{4}-(\d{2})', year_month)[0]
1728
- s, d = calendar.monthrange(int(year), int(month))
1729
- results.append({'起始日期': f'{year_month}-01', '结束日期': f'{year_month}-{d}'})
1730
-
1731
- return results # start_date至end_date之间的所有年月日
1732
-
1733
-
1734
1611
  if __name__ == '__main__':
1735
1612
  pass
mdbq/mysql/s_query.py CHANGED
@@ -1,16 +1,12 @@
1
1
  # -*- coding:utf-8 -*-
2
2
  import datetime
3
- import platform
4
3
  import re
5
4
  import time
6
- from functools import wraps
7
5
  import warnings
8
6
  import pymysql
9
7
  import numpy as np
10
8
  import pandas as pd
11
- from sqlalchemy import create_engine
12
9
  import os
13
- import calendar
14
10
  from decimal import Decimal
15
11
  import logging
16
12
 
mdbq/other/otk.py ADDED
@@ -0,0 +1,151 @@
1
+ import re
2
+ import calendar
3
+ import datetime
4
+ from dateutil.relativedelta import relativedelta
5
+ import pandas as pd
6
+ import numpy as np
7
+
8
+
9
+ def first_and_last_day(date):
10
+ """
11
+ 返回指定日期当月的第一天和最后一天
12
+ """
13
+ date = pd.to_datetime(date) # n 月以前的今天
14
+ _, _lastDay = calendar.monthrange(date.year, date.month) # 返回月的第一天的星期和当月总天数
15
+ _firstDay = datetime.date(date.year, date.month, day=1)
16
+ _lastDay = datetime.date(date.year, date.month, day=_lastDay)
17
+ return _firstDay, _lastDay
18
+
19
+
20
+ def get_day_of_month(num: int, fm=None):
21
+ """
22
+ num: 获取n月以前的第一天和最后一天, num=0时, 返回当月第一天和最后一天
23
+ fm: 日期输出格式
24
+ """
25
+ if not fm:
26
+ fm ='%Y%m%d'
27
+ _today = datetime.date.today()
28
+ months_ago = _today - relativedelta(months=num) # n 月以前的今天
29
+ _, _lastDay = calendar.monthrange(months_ago.year, months_ago.month) # 返回月的第一天的星期和当月总天数
30
+ _firstDay = datetime.date(months_ago.year, months_ago.month, day=1).strftime(fm)
31
+ _lastDay = datetime.date(months_ago.year, months_ago.month, day=_lastDay).strftime(fm)
32
+ return _firstDay, _lastDay
33
+
34
+
35
+ def dates_between(start_date, end_date, fm=None) -> list:
36
+ """
37
+ 获取两个日期之间的所有日期, 返回 list
38
+ fm: 日期输出格式
39
+ """
40
+ if not fm:
41
+ fm ='%Y-%m-%d'
42
+ start_date = pd.to_datetime(start_date)
43
+ end_date = pd.to_datetime(end_date)
44
+ dates = []
45
+ current_date = start_date
46
+ while current_date <= end_date:
47
+ dates.append(current_date.strftime(fm))
48
+ current_date += datetime.timedelta(days=1)
49
+ return dates
50
+
51
+
52
+ def cover_df(df):
53
+ df.replace([np.inf, -np.inf], '0', inplace=True) # 清理一些非法值
54
+ df.replace(to_replace=['\\N', '-', '--', '', 'nan', 'NAN'], value='0', regex=False, inplace=True) # 替换掉特殊字符
55
+ df.replace(to_replace=[','], value='', regex=True, inplace=True)
56
+ df.replace(to_replace=['="'], value='', regex=True, inplace=True) # ="和"不可以放在一起清洗, 因为有: id=86785565
57
+ df.replace(to_replace=['"'], value='', regex=True, inplace=True)
58
+ cols = df.columns.tolist()
59
+ for col in cols:
60
+ if col == 'id':
61
+ df.pop('id')
62
+ continue
63
+ # df[col] = df[col].apply(
64
+ # lambda x: float(float((str(x).rstrip("%"))) / 100) if re.findall(r'^\d+\.?\d*%$', str(x)) else x)
65
+ # df[col] = df[col].apply(lambda x:
66
+ # float(re.sub(r'%$', '', str(x))) / 100
67
+ # if (str(x) != '' and str(x).endswith('%')) and not re.findall(
68
+ # '[\\u4e00-\\u9fa5]', str(x)) else '0.0' if str(x) == '0%' else x)
69
+ df[col] = df[col].apply(
70
+ lambda x: float(str(x).rstrip("%")) / 100
71
+ if (
72
+ re.fullmatch(r'^\d+\.?\d*%$', str(x)) # 匹配数字加%格式
73
+ and not re.search(r'[\u4e00-\u9fa5]', str(x)) # 排除含中文的情况
74
+ )
75
+ else (
76
+ '0.0' if str(x) == '0%' else x # 处理 "0%"
77
+ )
78
+ )
79
+
80
+ try:
81
+ # 不能直接使用 int() ,对于大数,可能转为uint64,导致数据库入库可能异常
82
+ df[col] = df[col].apply(
83
+ lambda x: np.int64(str(x)) if '_' not in str(x) and '.' not in str(x) else x) # 不含小数点尝试转整数
84
+ except:
85
+ pass
86
+ try:
87
+ if df[col].dtype == 'object': # 有些列没有被 pandas 识别数据类型,会没有 dtype 属性
88
+ df[col] = df[col].apply(lambda x: float(x) if '.' in str(x) and '_' not in str(x) else x)
89
+ except:
90
+ pass
91
+ new_col = col.lower()
92
+ new_col = re.sub(r'[()\-,,&~^、 ()\"\'“”=·/。》《><!!`]', '_', new_col, re.IGNORECASE)
93
+ new_col = new_col.replace(')', '')
94
+ new_col = re.sub(r'_{2,}', '_', new_col)
95
+ new_col = re.sub(r'_+$', '', new_col)
96
+ df.rename(columns={col: new_col}, inplace=True)
97
+ df.fillna(0, inplace=True)
98
+ return df
99
+
100
+
101
+ def translate_keys(original_dict:dict, translation_dict:dict) -> dict:
102
+ """
103
+ original_dict键名翻译, 若键存在则返回翻译值,否则返回原键
104
+ """
105
+ return {translation_dict.get(k, k): v for k, v in original_dict.items()}
106
+
107
+
108
+ def is_valid_date(date_string):
109
+ """
110
+ mysql调用
111
+ 判断是否是日期格式, 且允许没有前导零, 且允许带时间
112
+ 纯日期格式: 返回 1
113
+ 日期+时间: 返回 2
114
+ """
115
+ date_pattern = r"^(\d{4})-(0?[1-9]|1[0-2])-(0?[1-9]|[12]\d|3[01])$"
116
+ match = re.match(date_pattern, str(date_string)) # 判断纯日期格式:2024-11-09
117
+ if match is None:
118
+ date_pattern = r".*\d+:\d+:\d+$"
119
+ match = re.match(date_pattern, date_string) # 判断日期+时间:2024-11-09 00:36:45
120
+ if match is not None:
121
+ return 2
122
+ else:
123
+ return 1
124
+
125
+
126
+ def is_integer(int_str):
127
+ """
128
+ mysql调用
129
+ 判断是否整数, 允许包含千分位分隔符, 允许科学计数法
130
+ """
131
+ # 如果是科学计数法
132
+ match = re.findall(r'^[-+]?(\d+)\.(\d+)[eE][-+]?(\d+)$', str(int_str))
133
+ if match:
134
+ if len(match[0]) == 3:
135
+ if int(match[0][0]) == 0: # 0 开头
136
+ if int(match[0][2]) > 10: # 转换后整数长度超过 10 位
137
+ return False
138
+ else: # 不是 0 开头
139
+ if len(match[0][0]) + int(match[0][2]) > 10: # 转换后整数长度超过 10 位
140
+ return False
141
+ if int(match[0][2]) >= len(match[0][1]):
142
+ return True
143
+ else:
144
+ return False
145
+ # 如果是普通数字, 且允许千分符
146
+ __pattern = r'^[-+]?\d{1,3}(,\d{3}){0,3}$|^[-+]?\d{1,9}$'
147
+ return re.match(__pattern, str(int_str)) is not None
148
+
149
+
150
+ if __name__ == '__main__':
151
+ pass
mdbq/spider/aikucun.py CHANGED
@@ -21,6 +21,7 @@ from mdbq.mysql import mysql
21
21
  from mdbq.mysql import s_query
22
22
  from mdbq.config import config
23
23
  from mdbq.other import ua_sj
24
+ from mdbq.other import otk
24
25
 
25
26
  dir_path = os.path.expanduser("~")
26
27
  config_file = os.path.join(dir_path, 'spd.txt')
@@ -47,18 +48,6 @@ def keep_connect(_db_name, _config, max_try: int=10):
47
48
  return None
48
49
 
49
50
 
50
- def dates_between(start_date, end_date) -> list:
51
- """ 获取两个日期之间的所有日期, 返回 list """
52
- start_date = pd.to_datetime(start_date)
53
- end_date = pd.to_datetime(end_date)
54
- dates = []
55
- current_date = start_date
56
- while current_date <= end_date:
57
- dates.append(current_date.strftime('%Y-%m-%d'))
58
- current_date += datetime.timedelta(days=1)
59
- return dates
60
-
61
-
62
51
  class AikuCun:
63
52
  def __init__(self):
64
53
  self.url = 'https://gray-merc.aikucun.com/index.html'
@@ -204,7 +193,7 @@ class AikuCun:
204
193
  self.start_date = start_date
205
194
  if end_date:
206
195
  self.end_date = end_date
207
- date_list = dates_between(start_date=self.start_date, end_date=self.end_date)
196
+ date_list = otk.dates_between(start_date=self.start_date, end_date=self.end_date)
208
197
 
209
198
  df = download.data_to_df(
210
199
  db_name=self.db_name,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 3.8.8
3
+ Version: 3.8.10
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,25 +1,17 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
- mdbq/__version__.py,sha256=wmmK6wPunxlLTIhSLvjwGzadK_0XfwOYNMA_w0QkRF4,17
2
+ mdbq/__version__.py,sha256=30BOEMWMMdvugdYm1n90xiBvxiQzusLf7XtVO4-Zjr8,18
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
4
  mdbq/aggregation/query_data.py,sha256=-4HWC1HZmgqUAuvcRiHZU4FLtI70nRq_Hp3eXVZTyH8,185843
5
- mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
6
- mdbq/bdup/bdup.py,sha256=hJs815hGFwm_X5bP2i9XugG2w2ZY_F0n3-Q0hVpIPPw,4892
7
5
  mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
8
6
  mdbq/config/config.py,sha256=eaTfrfXQ65xLqjr5I8-HkZd_jEY1JkGinEgv3TSLeoQ,3170
9
- mdbq/config/set_support.py,sha256=7C7NFy7Em_uC7lig54qQlIlKG_AJeMCskxzK87anGkM,462
10
- mdbq/dataframe/__init__.py,sha256=2HtCN8AdRj53teXDqzysC1h8aPL-mMFy561ESmhehGQ,22
11
- mdbq/dataframe/converter.py,sha256=lETYhT7KXlWzWwqguqhk6vI6kj4rnOBEW1lhqKy2Abc,5035
12
7
  mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
13
- mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
14
8
  mdbq/log/spider_logging.py,sha256=KX9TTUn9naZNBACCEFhyTktnWhr5JaSNQLppLGyrm9Y,1645
15
- mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
16
9
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
17
- mdbq/mysql/mysql.py,sha256=tR6l4Zzn9j6zKaFcy0Ktw2oL8OoX3QB6jDoDp1l2fiM,95474
18
- mdbq/mysql/s_query.py,sha256=09Dp7DrVXui6dAI6zFDfrsUOdjPblF_oYUpgqbZMhXg,8757
19
- mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
10
+ mdbq/mysql/mysql.py,sha256=2cPuqX4zq2b6ghFWxTylr52DPZGE2WNrCdFV0RcF6LY,89048
11
+ mdbq/mysql/s_query.py,sha256=X055aLRAgxVvueXx4NbfNjp6MyBI02_XBb1pTKw09L0,8660
20
12
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
21
13
  mdbq/other/download_sku_picture.py,sha256=YU8DxKMXbdeE1OOKEA848WVp62jYHw5O4tXTjUdq9H0,44832
22
- mdbq/other/porxy.py,sha256=UHfgEyXugogvXgsG68a7QouUCKaohTKKkI4RN-kYSdQ,4961
14
+ mdbq/other/otk.py,sha256=amIFeLDNUJpSi0U6hXbnqXeGTbYL-8-5U5yAATzSM3Y,5947
23
15
  mdbq/other/pov_city.py,sha256=AEOmCOzOwyjHi9LLZWPKi6DUuSC-_M163664I52u9qw,21050
24
16
  mdbq/other/ua_sj.py,sha256=JuVYzc_5QZ9s_oQSrTHVKkQv4S_7-CWx4oIKOARn_9U,22178
25
17
  mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
@@ -28,8 +20,8 @@ mdbq/pbix/refresh_all.py,sha256=OBT9EewSZ0aRS9vL_FflVn74d4l2G00wzHiikCC4TC0,5926
28
20
  mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
29
21
  mdbq/redis/getredis.py,sha256=Uk8-cOWT0JU1qRyIVqdbYokSLvkDIAfcokmYj1ebw8k,24104
30
22
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
31
- mdbq/spider/aikucun.py,sha256=bUjjPjNoW3EL6H89nnBdFEwnWgGuEB2CENuBxcvx0Kw,20284
32
- mdbq-3.8.8.dist-info/METADATA,sha256=gNb3isnfJokgROYlgUClnTfmR7AT-GfhxCkCyfLasrI,363
33
- mdbq-3.8.8.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
34
- mdbq-3.8.8.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
35
- mdbq-3.8.8.dist-info/RECORD,,
23
+ mdbq/spider/aikucun.py,sha256=YLRTDgOKPGDyNB-z5dPOJhBoTzM6Rmbjy1Qng_KyJQc,19906
24
+ mdbq-3.8.10.dist-info/METADATA,sha256=bYHqcx9saoyDq-BQSXUtUQWTcDNICdZeF5jaKP6Dlyc,364
25
+ mdbq-3.8.10.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
26
+ mdbq-3.8.10.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
27
+ mdbq-3.8.10.dist-info/RECORD,,
mdbq/bdup/__init__.py DELETED
@@ -1,5 +0,0 @@
1
-
2
-
3
-
4
- # 百度云数据处理
5
-
mdbq/bdup/bdup.py DELETED
@@ -1,111 +0,0 @@
1
- # -*- coding: UTF-8 –*-
2
- import os
3
- import platform
4
- import subprocess
5
- from concurrent.futures import ThreadPoolExecutor
6
- from bypy import ByPy
7
-
8
-
9
- class BaiDu:
10
- """
11
- 如果通过调用命令行终端运行, 云端路径必须使用linux格式,不要使用windows格式,否则在windows系统里面会上传失败(无法在云端创建文件)
12
- """
13
- def __init__(self):
14
- self.local_path = None
15
- self.remote_path = None
16
- self.skip:list = []
17
- self.delete_remote_files:list = []
18
- self.bp = ByPy()
19
- self.count = 0
20
- self.total = 0
21
-
22
- def upload_dir(self, local_path, remote_path):
23
- """
24
- 上传整个文件夹,执行完后删除指定文件, 指定 self.delete_remote_files
25
- 如果通过调用命令行终端运行, 《云端路径!!》必须使用linux格式,不要使用反斜杆,否则在windows系统里面会上传失败
26
- """
27
- self.local_path = local_path
28
- self.remote_path = remote_path.replace('\\', '/')
29
- if not os.path.exists(self.local_path):
30
- print(f'{self.local_path}: 本地目录不存在,没有什么可传的')
31
- return
32
-
33
- if platform.system() == 'Windows':
34
- self.bp.upload(localpath=self.local_path, remotepath=self.remote_path.replace('\\', '/')) # 上传文件到百度云
35
- else:
36
- command = f'bypy upload "{self.local_path}" "{self.remote_path}" --on-dup skip' # 相同文件跳过
37
- try:
38
- subprocess.run(command, shell=True)
39
- except Exception as e:
40
- print(e)
41
- self.delete_files() # 最好是在内部执行删除, 避免路径异常
42
-
43
- def upload_file(self, local_path, remote_path, processes=False):
44
- """
45
- 上传文件夹,按单个文件上传,可以跳过指定文件/文件夹, 指定 self.skip
46
- 《云端路径!!》必须使用linux格式
47
- """
48
- if not isinstance(self.skip, list):
49
- raise TypeError('skip must be a list')
50
- self.skip += ['.DS_Store', '.localized', 'desktop.ini', '$RECYCLE.BIN', 'Icon']
51
- self.local_path = local_path
52
- self.remote_path = remote_path.replace('\\', '/')
53
- if not os.path.exists(self.local_path):
54
- print(f'{self.local_path}: 本地目录不存在,没有什么可传的')
55
- return
56
-
57
- local_files = os.listdir(self.local_path)
58
-
59
- local_file_list = []
60
- for file in local_files:
61
- if file in self.skip: # 跳过指定文件/文件夹
62
- continue
63
- local_p = os.path.join(self.local_path, file)
64
- if os.path.isfile(local_p):
65
- rt_path = os.path.join(self.remote_path, file).replace('\\', '/')
66
- self.total += 1
67
- local_file_list.append({local_p: rt_path})
68
- elif os.path.isdir(local_p):
69
- for root, dirs, files in os.walk(local_p, topdown=False):
70
- for name in files:
71
- if name in self.skip: # 从子文件夹内跳过指定文件
72
- continue
73
- lc_path = os.path.join(root, name)
74
- rt_path = lc_path.replace(self.local_path, self.remote_path).replace('\\', '/')
75
- self.total += 1
76
- local_file_list.append({lc_path: rt_path})
77
- if processes:
78
- # 不指定 max_workers 参数,默认值是 os.cpu_count() * 5
79
- with ThreadPoolExecutor() as executor:
80
- executor.map(self.up_one_file, local_file_list)
81
- else:
82
- for item in local_file_list:
83
- self.up_one_file(file_dict=item)
84
-
85
- def up_one_file(self, file_dict:dict):
86
- if not isinstance(file_dict, dict):
87
- raise TypeError('file_dict must be a dict')
88
- for k, v in file_dict.items():
89
- self.count += 1
90
- print(f'上传: {self.count}/{self.total} {k}')
91
- self.bp.upload(localpath=k, remotepath=v) # 上传文件到百度云
92
-
93
- def delete_files(self):
94
- """ 移除云端文件,位于 self.remote_path 文件夹下的子文件 """
95
- self.delete_remote_files += ['.DS_Store', '.localized', 'desktop.ini', '$RECYCLE.BIN', 'Icon']
96
- for delete_file in self.delete_remote_files:
97
- self.bp.remove(remotepath=f'{self.remote_path.replace('\\', '/')}/{delete_file}') # 移除文件
98
-
99
- def download_dir(self, local_path, remote_path):
100
- """ 下载文件夹到本地 """
101
- self.local_path = local_path
102
- self.remote_path = remote_path.replace('\\', '/')
103
- if not os.path.exists(self.local_path):
104
- os.mkdir(self.local_path)
105
-
106
- self.bp.download(localpath=f'{self.local_path}', remotepath=f'{self.remote_path.replace('\\', '/')}')
107
-
108
-
109
- if __name__ == '__main__':
110
- bp = ByPy()
111
- bp.list()
@@ -1,20 +0,0 @@
1
- # -*- coding: UTF-8 –*-
2
- import platform
3
- import getpass
4
- import os
5
- import sys
6
-
7
- """
8
- 专门用来设置 support 文件夹路径
9
- support 文件夹包含很多配置类文件,是程序必不可少的依赖
10
- """
11
-
12
-
13
- class SetSupport:
14
- def __init__(self, dirname):
15
- self.dirname = os.path.join(os.path.realpath(os.path.dirname(sys.argv[0])), dirname)
16
-
17
-
18
- if __name__ == '__main__':
19
- s = SetSupport(dirname='support').dirname
20
- print(s)
@@ -1,4 +0,0 @@
1
-
2
-
3
-
4
- # dataframe 优化
@@ -1,107 +0,0 @@
1
- # -*- coding:utf-8 -*-
2
- import pandas as pd
3
- import numpy as np
4
- from decimal import Decimal
5
- import re
6
-
7
-
8
- class DataFrameConverter(object):
9
- def __init__(self, df=pd.DataFrame({})):
10
- self.df = df
11
-
12
- def convert_df_cols(self, df=pd.DataFrame({})):
13
- """
14
- 清理 dataframe 非法值
15
- 对数据类型进行转换(尝试将 object 类型转为 int 或 float)
16
- """
17
- if len(df) == 0:
18
- df = self.df
19
- if len(df) == 0:
20
- return
21
-
22
- def find_longest_decimal_value(number_list):
23
- # 取列表中小数位数最长的值
24
- longest_value = None
25
- max_decimals = 0
26
- for num in number_list:
27
- decimal_places = len(str(num).split('.')[1])
28
- if decimal_places > max_decimals:
29
- max_decimals = decimal_places
30
- longest_value = num
31
- return longest_value
32
-
33
- # dtypes = df.dtypes.apply(str).to_dict() # 将 dataframe 数据类型转为字典形式
34
- df.replace([np.inf, -np.inf], '0', inplace=True) # 清理一些非法值
35
- # df.replace(to_replace=['\\N', '-', '--', '', 'nan', 'NAN'], value='0', regex=False, inplace=True) # 替换掉特殊字符
36
- df.replace(to_replace=['\\N', '', 'nan', 'NAN'], value='0', regex=False, inplace=True) # 替换掉特殊字符
37
- # df.replace(to_replace=[','], value='', regex=True, inplace=True)
38
- df.replace(to_replace=['="'], value='', regex=True, inplace=True) # ="和"不可以放在一起清洗, 因为有: id=86785565
39
- df.replace(to_replace=['"'], value='', regex=True, inplace=True)
40
- cols = df.columns.tolist()
41
-
42
- df.reset_index(inplace=True, drop=True) # 重置索引,避免下面的 df.loc[0, col] 会出错
43
-
44
- for col in cols:
45
- if col.lower() == 'id':
46
- df.pop(col) # 等待插入的 df 不能包含 id 列,否则可能跟现有 id 主键冲突
47
- continue
48
-
49
- try:
50
- # 百分比在某些数据库中不兼容, 转换百分比为小数, # 转百分比的列不能含有中文或特殊字符
51
- df[col] = df[col].apply(
52
- lambda x: float(float((str(x).rstrip("%"))) / 100) if re.findall(r'^\d+\.?\d*%$', str(x)) else x)
53
- except Exception as e:
54
- print(f'留意错误信息: 位于列 -> {col} -> {e}')
55
-
56
- if (col.endswith('占比') or col.endswith('率') or col.endswith('同比')
57
- or col.endswith('环比') or col.lower().endswith('roi')
58
- or col.endswith('产出比')):
59
- df = df.astype({col: 'float64'}, errors='raise')
60
-
61
- # 尝试转换合适的数据类型
62
- if df[col].dtype == 'object':
63
- # "_"符号会被错误识别
64
- try:
65
- # 不能直接使用 int() ,对于大数,可能转为uint64,导致数据库入库可能异常
66
- df[col] = df[col].apply(
67
- lambda x: np.int64(str(x)) if '_' not in str(x) and '.' not in str(x) else x) # 不含小数点尝试转整数
68
- # df[col] = df[col].apply(lambda x: int(x) if '_' not in str(x) and '.' not in str(x) else x) # 不含小数点尝试转整数
69
- except:
70
- pass
71
- if df[col].dtype == 'object':
72
- try:
73
- df[col] = df[col].apply(lambda x: float(x) if '.' in str(x) and '_' not in str(x) else x)
74
- except:
75
- pass
76
- if df[col].dtype == 'float' or df[col].dtype == 'float64': # 对于小数类型, 保留 6 位小数
77
- df[col] = df[col].fillna(0.0).apply(lambda x: round(x, 6))
78
-
79
- # 转换日期样式的列为日期类型
80
- value = df.loc[0, col]
81
- if value:
82
- res = re.match(r'\d{4}-\d{2}-\d{2}|\d{4}-\d{2}-\d{2} |\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}'
83
- r'|\d{4}/\d{1}/\d{1}|\d{4}/\d{1}/\d{2}|\d{4}/\d{2}/\d{1}|\d{4}/\d{2}/\d{2}', str(value))
84
- if res:
85
- try:
86
- df[col] = df[col].apply(lambda x: pd.to_datetime(x))
87
- except:
88
- pass
89
- new_col = col.lower()
90
- new_col = re.sub(r'[()\-,,&~^、 ()\"\'“”=·/。》《><!!`]', '_', new_col, re.IGNORECASE)
91
- new_col = new_col.replace(')', '')
92
- new_col = re.sub(r'_{2,}', '_', new_col)
93
- new_col = re.sub(r'_+$', '', new_col)
94
- df.rename(columns={col: new_col}, inplace=True)
95
- df.fillna(0, inplace=True)
96
- return df
97
-
98
-
99
- if __name__ == '__main__':
100
- # df = pd.DataFrame(np.random.randn(5, 3), columns=['a', 'b', 'c'])
101
- # converter = DataFrameConverter()
102
- # df = converter.convert_df_cols(df)
103
- # print(df['a'].dtype)
104
- # print(df)
105
- pattern = '1540%'
106
- pattern = re.findall(r'^\d+\.?\d*%$', pattern)
107
- print(pattern)
mdbq/log/mylogger.py DELETED
@@ -1,66 +0,0 @@
1
- import logging
2
- from logging import Logger
3
- from logging import handlers
4
-
5
-
6
- class MyLogger(Logger):
7
- """
8
- 从Logger类中继承,实例化一个日志器
9
- """
10
- def __init__(self, logger_name, level='INFO', is_stream_handler=True, file=None, debug_file=None,
11
- max_bytes=False, back_count=10, when=None):
12
- """
13
- :param logger_name: 日志器的名字
14
- :param level: 日志级别 # DEBUG INFO WARNING ERROR CRITICAL
15
- :param is_stream_handler: 默认True输出到控制台
16
- :param file: 传入文件名,默认None不输出到 file
17
- param debug_file: 传入文件名,记录详细debug时使用,默认None不输出, 尽量不要和file同时使用,会重复写
18
- :param when: 按周期分割日志,默认不分割,除非指定其他值
19
- :param max_bytes: 按文件大小分割日志
20
- :param back_count: 保留日志的数量, 值从0开始
21
- """
22
- # 设置日志器名字、级别
23
- super().__init__(logger_name, level)
24
-
25
- # 定义日志格式, 使用Formatter类实例化一个日志类
26
- fmt_stream = "%(asctime)s %(levelname)s %(name)s: %(message)s"
27
- fmt_file = "%(asctime)s %(name)s: %(message)s"
28
- fmt_debug_file = "%(asctime)s %(levelname)s %(name)s %(funcName)s: %(message)s"
29
- formatter_stream = logging.Formatter(fmt_stream, datefmt="%Y-%m-%d %H:%M:%S")
30
- formatter_file = logging.Formatter(fmt_file, datefmt="%Y-%m-%d %H:%M:%S")
31
- formatter_debug_file = logging.Formatter(fmt_debug_file, datefmt="%Y-%m-%d %H:%M:%S")
32
-
33
- # 创建一个handler,默认输出到控制台,如果设置为False,日志将不输出到控制台
34
- if is_stream_handler:
35
- stream_handler = logging.StreamHandler() # 设置渠道当中的日志格式
36
- stream_handler.setFormatter(formatter_stream) # 将渠道与实例日志器绑定
37
- self.addHandler(stream_handler)
38
-
39
- # 创建一个handler,输出到文件file
40
- if file:
41
- file_handle = logging.FileHandler(file, mode='a', encoding='utf-8')
42
- file_handle.setFormatter(formatter_file)
43
- self.addHandler(file_handle)
44
-
45
- # 创建一个handler,输出到文件file,记录详细的debug信息
46
- if debug_file:
47
- debug_file_handle = logging.FileHandler(debug_file, mode='a', encoding='utf-8')
48
- debug_file_handle.setFormatter(formatter_debug_file)
49
- self.addHandler(debug_file_handle)
50
-
51
- # 创建一个handler,按日志文件大小分割
52
- if max_bytes:
53
- formatter_ = logging.Formatter(fmt='%(asctime)s %(name)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S")
54
- formatter_time = handlers.RotatingFileHandler(filename='日志_分割.txt', encoding='utf-8',
55
- maxBytes=max_bytes, backupCount=back_count)
56
- formatter_time.setLevel(level)
57
- formatter_time.setFormatter(formatter_)
58
- self.addHandler(formatter_time)
59
-
60
- # 创建一个handler,按指定周期分割日志
61
- if when:
62
- pass
63
-
64
-
65
- if __name__ == '__main__':
66
- pass
mdbq/mongo/__init__.py DELETED
@@ -1,4 +0,0 @@
1
-
2
-
3
-
4
- # mongodb
@@ -1,38 +0,0 @@
1
- # -*- coding:utf-8 -*-
2
- import warnings
3
- import pandas as pd
4
- import calendar
5
-
6
- warnings.filterwarnings('ignore')
7
-
8
-
9
- def year_month_day(start_date, end_date):
10
- """
11
- 使用date_range函数和DataFrame来获取从start_date至end_date之间的所有年月日
12
- calendar.monthrange: 获取当月第一个工作日的星期值(0,6) 以及当月天数
13
- 返回值: [{'起始日期': '2025-05-01', '结束日期': '2025-05-31'}, {'起始日期': '2025-06-01', '结束日期': '2025-06-30'}]
14
- """
15
- # 替换年月日中的日, 以便即使传入当月日期也有返回值
16
- try:
17
- start_date = f'{pd.to_datetime(start_date).year}-{pd.to_datetime(start_date).month}-01'
18
- except Exception as e:
19
- print(e)
20
- return []
21
- # 使用pandas的date_range创建一个日期范围,频率为'MS'代表每月开始
22
- date_range = pd.date_range(start=start_date, end=end_date, freq='MS')
23
- # 转换格式
24
- year_months = date_range.strftime('%Y-%m').drop_duplicates().sort_values()
25
-
26
- results = []
27
- for year_month in year_months:
28
- year = re.findall(r'(\d{4})', year_month)[0]
29
- month = re.findall(r'\d{4}-(\d{2})', year_month)[0]
30
- s, d = calendar.monthrange(int(year), int(month))
31
- results.append({'起始日期': f'{year_month}-01', '结束日期': f'{year_month}-{d}'})
32
-
33
- return results # start_date至end_date之间的所有年月日
34
-
35
-
36
- if __name__ == '__main__':
37
- results = year_month_day(start_date='2025-05-01', end_date='2025-08-01')
38
- print(results)
mdbq/other/porxy.py DELETED
@@ -1,115 +0,0 @@
1
- import requests
2
- import kdl
3
- import warnings
4
- import os
5
- import requests
6
- import datetime
7
- import re
8
- import time
9
- import socket
10
- warnings.filterwarnings('ignore')
11
- """
12
- 需要传入 订单的 secret_id 和 secret_key
13
- """
14
-
15
-
16
- class MyProxy(object):
17
-
18
- def __init__(self, secret_id, secret_key):
19
- self.secret_id = secret_id
20
- self.secret_key = secret_key
21
- self.cookie_path = 'cookies'
22
- if not os.path.exists(self.cookie_path):
23
- os.mkdir(self.cookie_path)
24
-
25
- def get_proxy(self):
26
- """
27
- 从代理网站获取代理ip, 默认参数是文件位置,不需要修改
28
- """
29
- secret_id = self.secret_id
30
- secret_key = self.secret_key
31
- cookie_path = self.cookie_path
32
- headers = {
33
- "User-Agent": 'Mozilla/5.0'
34
- }
35
- auth = kdl.Auth(secret_id=secret_id, secret_key=secret_key)
36
- client = kdl.Client(auth)
37
-
38
- def ip_address():
39
- try:
40
- _response = requests.get("https://api.ipify.org/?format=json")
41
- _ip = _response.json()["ip"]
42
- except:
43
- _ip = ''
44
- return str(_ip)
45
-
46
- myip_path = f'{cookie_path}/本机ip_{socket.gethostname()}.txt' # 将本机地址保存本地, 下次直接使用, 避免获取失败
47
- if os.path.exists(myip_path):
48
- file_timestamp = os.path.getmtime(myip_path)
49
- file_date = datetime.datetime.fromtimestamp(file_timestamp).strftime('%Y-%m-%d')
50
- today_date = datetime.datetime.today().strftime('%Y-%m-%d')
51
- if file_date == today_date:
52
- with open(myip_path) as m:
53
- my_ip = m.read().strip()
54
- else:
55
- my_ip = ip_address()
56
- with open(f'{cookie_path}/本机ip_{socket.gethostname()}.txt', 'w') as f:
57
- f.write(my_ip)
58
- else:
59
- my_ip = ip_address()
60
- with open(f'{cookie_path}/本机ip_{socket.gethostname()}.txt', 'w') as f:
61
- f.write(my_ip)
62
- try:
63
- ip_whitelist = client.get_ip_whitelist() # 检查ip白名单, 如果这句报错,就直接设置白名单
64
- if my_ip not in ip_whitelist:
65
- ip_whitelist.append(my_ip)
66
- client.set_ip_whitelist(ip_whitelist) # 添加本机到白名单
67
- except Exception as e:
68
- print(e)
69
- client.set_ip_whitelist(my_ip) # 设置本机到白名单,会清空其他ip
70
-
71
- if not os.path.isfile(f'{cookie_path}/secret_token_{socket.gethostname()}.txt'): # 如果本地没有密钥令牌则创建
72
- secret_token = client.get_secret_token()
73
- with open(f'{cookie_path}/secret_token_{socket.gethostname()}.txt', 'w') as f:
74
- f.write(secret_token)
75
- else:
76
- with open(f'{cookie_path}/secret_token_{socket.gethostname()}.txt', 'r') as f:
77
- secret_token = f.read()
78
- data = f'secret_id={secret_id}&secret_token={secret_token}' # 检查密钥令牌的有效时长
79
- token_expire = requests.post(
80
- 'https://dev.kdlapi.com/api/check_secret_token',
81
- data, headers=headers).json()['data']['expire']
82
- if token_expire < 300: # token_expire 密钥令牌距离过期的剩余时长(单位:秒),不足5分钟则重新创建令牌
83
- secret_token = client.get_secret_token()
84
- with open(f'{cookie_path}/secret_token_{socket.gethostname()}.txt', 'w') as f:
85
- f.write(secret_token)
86
- # api地址
87
- proxy_url = (f'https://dev.kdlapi.com/api/getdps/?'
88
- f'secret_id={secret_id}'
89
- f'&signature={secret_token}'
90
- f'&num=1&pt=1&format=text&sep=1&f_loc=1&f_citycode=1&area=440100')
91
- # expire_time = client.get_order_expire_time() # 账户有效期
92
- _proxy = requests.get(proxy_url, headers=headers).text # 通过api地址获取代理ip
93
- ip_times = client.get_dps_valid_time(proxy=_proxy).values() # ip有效时间
94
- for t in ip_times:
95
- if str(t) != '0':
96
- ip_times = t
97
- balance = client.get_ip_balance(sign_type='hmacsha1') # 可用ip余额
98
- d_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
99
- ip_proxy = re.findall(r'\d+\.\d+\.\d+\.\d+:\d+', _proxy)[0]
100
- city_proxy = re.findall(r'\d+\.\d+\.\d+\.\d+:\d+,([\u4e00-\u9fa5]+),', _proxy)[0]
101
- ip_port = ip_proxy.split(':')
102
- content = (f'{d_time} 中转IP:{ip_port[0]}, '
103
- f'端口:{ip_port[1]}, '
104
- f'出口地址:{city_proxy}, '
105
- f'ip时长:{ip_times}秒, '
106
- f'可用ip余额:{balance}, '
107
- )
108
- # print(content)
109
- with open(f'{cookie_path}/代理ip地址.txt', 'a', encoding='utf-8') as f:
110
- f.write(content)
111
- return ip_proxy
112
-
113
-
114
- if __name__ == '__main__':
115
- cookie_path = 'cookies'
File without changes