oafuncs 0.0.98.3__py3-none-any.whl → 0.0.98.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,7 +13,9 @@ SystemInfo: Windows 11
13
13
  Python Version: 3.12
14
14
  """
15
15
 
16
+ import asyncio
16
17
  import datetime
18
+ import logging
17
19
  import os
18
20
  import random
19
21
  import re
@@ -23,11 +25,11 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
23
25
  from pathlib import Path
24
26
  from threading import Lock
25
27
 
28
+ import httpx
26
29
  import matplotlib.pyplot as plt
27
30
  import netCDF4 as nc
28
31
  import numpy as np
29
32
  import pandas as pd
30
- import requests
31
33
  import xarray as xr
32
34
  from rich import print
33
35
  from rich.progress import Progress
@@ -38,6 +40,9 @@ from oafuncs.oa_file import file_size
38
40
  from oafuncs.oa_nc import check as check_nc
39
41
  from oafuncs.oa_nc import modify as modify_nc
40
42
 
43
+ logging.getLogger("httpx").setLevel(logging.WARNING) # 关闭 httpx 的 INFO 日志,只显示 WARNING 及以上
44
+
45
+
41
46
  warnings.filterwarnings("ignore", category=RuntimeWarning, message="Engine '.*' loading failed:.*")
42
47
 
43
48
  __all__ = ["draw_time_range", "download"]
@@ -414,13 +419,13 @@ def _check_time_in_dataset_and_version(time_input, time_end=None):
414
419
  trange_list.append(f"{time_s}-{time_e}")
415
420
  have_data = True
416
421
 
417
- # 输出结果
418
- if match_time is None:
419
- print(f"[bold red]{time_input_str} is in the following dataset and version:")
420
422
  if have_data:
421
423
  if match_time is None:
424
+ print(f"[bold red]Time {time_input_str} included in:")
425
+ dv_num = 1
422
426
  for d, v, trange in zip(d_list, v_list, trange_list):
423
- print(f"[bold blue]{d} {v} {trange}")
427
+ print(f"{dv_num} -> [bold blue]{d} - {v} : {trange}")
428
+ dv_num += 1
424
429
  if is_single_time:
425
430
  return True
426
431
  else:
@@ -432,7 +437,7 @@ def _check_time_in_dataset_and_version(time_input, time_end=None):
432
437
  print(f"[bold red]{time_start} to {time_end} is in different datasets or versions, so you can't download them together")
433
438
  return False
434
439
  else:
435
- print(f"[bold red]{time_input_str} is not in any dataset and version")
440
+ print(f"[bold red]Time {time_input_str} has no data")
436
441
  return False
437
442
 
438
443
 
@@ -507,7 +512,8 @@ def _direct_choose_dataset_and_version(time_input, time_end=None):
507
512
 
508
513
  if dataset_name_out is not None and version_name_out is not None:
509
514
  if match_time is None:
510
- print(f"[bold purple]dataset: {dataset_name_out}, version: {version_name_out} is chosen")
515
+ # print(f"[bold purple]dataset: {dataset_name_out}, version: {version_name_out} is chosen")
516
+ print(f"[bold purple]Chosen dataset: {dataset_name_out} - {version_name_out}")
511
517
 
512
518
  # 如果没有找到匹配的数据集和版本,会返回 None
513
519
  return dataset_name_out, version_name_out
@@ -611,7 +617,7 @@ def _get_mean_size_move(same_file, current_file):
611
617
  size_difference_ratio = (current_file_size - fsize_dict[same_file]["mean_size"]) / fsize_dict[same_file]["mean_size"]
612
618
 
613
619
  if abs(size_difference_ratio) > tolerance_ratio:
614
- if check_nc(current_file,print_messages=False):
620
+ if check_nc(current_file, print_messages=False):
615
621
  fsize_dict[same_file]["size_list"] = [current_file_size]
616
622
  fsize_dict[same_file]["mean_size"] = current_file_size
617
623
  else:
@@ -662,110 +668,122 @@ def _correct_time(nc_file):
662
668
  modify_nc(nc_file, "time", None, time_difference)
663
669
 
664
670
 
671
+ class _HycomDownloader:
672
+ def __init__(self, tasks, delay_range=(3, 6), timeout_factor=120, max_var_count=5, max_retries=3):
673
+ """
674
+ :param tasks: List of (url, save_path)
675
+ """
676
+ self.tasks = tasks
677
+ self.delay_range = delay_range
678
+ self.timeout_factor = timeout_factor
679
+ self.max_var_count = max_var_count
680
+ self.max_retries = max_retries
681
+ self.count = {"success": 0, "fail": 0}
682
+
683
+ def user_agent(self):
684
+ return get_ua()
685
+
686
+ async def _download_one(self, url, save_path):
687
+ file_name = os.path.basename(save_path)
688
+ headers = {"User-Agent": self.user_agent()}
689
+ var_count = min(max(url.count("var="), 1), self.max_var_count)
690
+ timeout_max = self.timeout_factor * var_count
691
+ timeout = random.randint(timeout_max // 2, timeout_max)
692
+
693
+ retry = 0
694
+ while retry <= self.max_retries:
695
+ try:
696
+ await asyncio.sleep(random.uniform(*self.delay_range))
697
+ start = datetime.datetime.now()
698
+ async with httpx.AsyncClient(
699
+ timeout=httpx.Timeout(timeout),
700
+ limits=httpx.Limits(max_connections=2, max_keepalive_connections=2),
701
+ transport=httpx.AsyncHTTPTransport(retries=2),
702
+ ) as client:
703
+ print(f"[bold #f0f6d0]Requesting {file_name} (Attempt {retry + 1}) ...")
704
+ response = await client.get(url, headers=headers, follow_redirects=True)
705
+ response.raise_for_status()
706
+ if not response.content:
707
+ raise ValueError("Empty response received")
708
+
709
+ print(f"[bold #96cbd7]Downloading {file_name} ...")
710
+ with open(save_path, "wb") as f:
711
+ async for chunk in response.aiter_bytes(32 * 1024):
712
+ f.write(chunk)
713
+
714
+ elapsed = datetime.datetime.now() - start
715
+ print(f"[#3dfc40]File [bold #dfff73]{file_name} [#3dfc40]downloaded, Time: [#39cbdd]{elapsed}")
716
+ self.count["success"] += 1
717
+ count_dict["success"] += 1
718
+ return
719
+
720
+ except Exception as e:
721
+ print(f"[bold red]Failed ({type(e).__name__}): {e}")
722
+ if retry < self.max_retries:
723
+ backoff = 2**retry
724
+ print(f"[yellow]Retrying in {backoff:.1f}s ...")
725
+ await asyncio.sleep(backoff)
726
+ retry += 1
727
+ else:
728
+ print(f"[red]Giving up on {file_name}")
729
+ self.count["fail"] += 1
730
+ count_dict["fail"] += 1
731
+ return
732
+
733
+ async def run(self):
734
+ print(f"📥 Starting download of {len(self.tasks)} files ...")
735
+ for url, save_path in self.tasks:
736
+ await self._download_one(url, save_path)
737
+
738
+ print("\n✅ All tasks completed.")
739
+ print(f"✔️ Success: {self.count['success']} | ❌ Fail: {self.count['fail']}")
740
+
741
+
665
742
  def _download_file(target_url, store_path, file_name, cover=False):
666
- fname = Path(store_path) / file_name
743
+ save_path = Path(store_path) / file_name
667
744
  file_name_split = file_name.split("_")
668
745
  file_name_split = file_name_split[:-1]
669
746
  same_file = "_".join(file_name_split) + "*nc"
670
747
 
671
748
  if match_time is not None:
672
- if check_nc(fname, print_messages=False):
673
- if not _check_ftime(fname, if_print=True):
749
+ if check_nc(save_path, print_messages=False):
750
+ if not _check_ftime(save_path, if_print=True):
674
751
  if match_time:
675
- _correct_time(fname)
752
+ _correct_time(save_path)
676
753
  count_dict["skip"] += 1
677
754
  else:
678
- _clear_existing_file(fname)
755
+ _clear_existing_file(save_path)
679
756
  count_dict["no_data"] += 1
680
757
  else:
681
758
  count_dict["skip"] += 1
682
759
  print(f"[bold green]{file_name} is correct")
683
760
  return
684
761
 
685
- if not cover and os.path.exists(fname):
686
- print(f"[bold #FFA54F]{fname} exists, skipping ...")
762
+ if not cover and os.path.exists(save_path):
763
+ print(f"[bold #FFA54F]{save_path} exists, skipping ...")
687
764
  count_dict["skip"] += 1
688
765
  return
689
766
 
690
767
  if same_file not in fsize_dict.keys():
691
- check_nc(fname, delete_if_invalid=True, print_messages=False)
768
+ check_nc(save_path, delete_if_invalid=True, print_messages=False)
692
769
 
693
- get_mean_size = _get_mean_size_move(same_file, fname)
770
+ get_mean_size = _get_mean_size_move(same_file, save_path)
694
771
 
695
- if _check_existing_file(fname, get_mean_size):
772
+ if _check_existing_file(save_path, get_mean_size):
696
773
  count_dict["skip"] += 1
697
774
  return
698
775
 
699
- _clear_existing_file(fname)
776
+ _clear_existing_file(save_path)
700
777
 
701
778
  if not use_idm:
702
- print(f"[bold #f0f6d0]Requesting {file_name} ...")
703
- s = requests.Session()
704
- download_success = False
705
- request_times = 0
706
-
707
- def calculate_wait_time(time_str, target_url):
708
- time_pattern = r"\d{10}"
709
- times_in_str = re.findall(time_pattern, time_str)
710
- num_times_str = len(times_in_str)
711
-
712
- if num_times_str > 1:
713
- delta_t = datetime.datetime.strptime(times_in_str[1], "%Y%m%d%H") - datetime.datetime.strptime(times_in_str[0], "%Y%m%d%H")
714
- delta_t = delta_t.total_seconds() / 3600
715
- delta_t = delta_t / 3 + 1
716
- else:
717
- delta_t = 1
718
- num_var = int(target_url.count("var="))
719
- if num_var <= 0:
720
- num_var = 1
721
- return int(delta_t * 5 * 60 * num_var)
722
-
723
- max_timeout = calculate_wait_time(file_name, target_url)
724
- print(f"[bold #912dbc]Max timeout: {max_timeout} seconds")
725
-
726
- download_time_s = datetime.datetime.now()
727
- order_list = ["1st", "2nd", "3rd", "4th", "5th", "6th", "7th", "8th", "9th", "10th"]
728
- while not download_success:
729
- if request_times >= 10:
730
- print(f"[bold #ffe5c0]Download failed after {request_times} times\nYou can skip it and try again later")
731
- count_dict["fail"] += 1
732
- break
733
- if request_times > 0:
734
- print(f"[bold #ffe5c0]Retrying the {order_list[request_times - 1]} time...")
735
- try:
736
- headers = {"User-Agent": get_ua()}
737
- response = s.get(target_url, headers=headers, stream=True, timeout=random.randint(5, max_timeout))
738
- response.raise_for_status()
739
- with open(fname, "wb") as f:
740
- print(f"[bold #96cbd7]Downloading {file_name} ...")
741
- for chunk in response.iter_content(chunk_size=1024):
742
- if chunk:
743
- f.write(chunk)
744
-
745
- f.close()
746
-
747
- if os.path.exists(fname):
748
- download_success = True
749
- download_time_e = datetime.datetime.now()
750
- download_delta = download_time_e - download_time_s
751
- print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{download_delta}")
752
- count_dict["success"] += 1
753
-
754
- except requests.exceptions.HTTPError as errh:
755
- print(f"Http Error: {errh}")
756
- except requests.exceptions.ConnectionError as errc:
757
- print(f"Error Connecting: {errc}")
758
- except requests.exceptions.Timeout as errt:
759
- print(f"Timeout Error: {errt}")
760
- except requests.exceptions.RequestException as err:
761
- print(f"OOps: Something Else: {err}")
762
-
763
- time.sleep(3)
764
- request_times += 1
779
+ python_downloader = _HycomDownloader([(target_url, save_path)])
780
+ asyncio.run(python_downloader.run())
781
+ time.sleep(3 + random.uniform(0, 10))
765
782
  else:
766
783
  idm_downloader(target_url, store_path, file_name, given_idm_engine)
767
- idm_download_list.append(fname)
768
- print(f"[bold #3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been submit to IDM for downloading")
784
+ idm_download_list.append(save_path)
785
+ # print(f"[bold #3dfc40]File [bold #dfff73]{save_path} [#3dfc40]has been submit to IDM for downloading")
786
+ time.sleep(3 + random.uniform(0, 10))
769
787
 
770
788
 
771
789
  def _check_hour_is_valid(ymdh_str):
@@ -992,7 +1010,7 @@ def download(
992
1010
 
993
1011
  Returns:
994
1012
  None
995
-
1013
+
996
1014
  Example:
997
1015
  >>> download(
998
1016
  variables='u',
@@ -1088,7 +1106,7 @@ def download(
1088
1106
 
1089
1107
  if validate_time is not None:
1090
1108
  workers = 1
1091
- print('*' * mark_len)
1109
+ print("*" * mark_len)
1092
1110
  print("[bold red]Only checking the time of existing files.")
1093
1111
  bar_desc = "Checking time ..."
1094
1112
 
@@ -1158,20 +1176,20 @@ if __name__ == "__main__":
1158
1176
 
1159
1177
  options = {
1160
1178
  "variables": var_list,
1161
- "start_time": "2025010300",
1162
- "end_time": "2025010309",
1163
- "output_dir": r"I:\Data\HYCOM\3hourly_test",
1179
+ "start_time": "2018010100",
1180
+ "end_time": "2019063000",
1181
+ "output_dir": r"G:\Data\HYCOM\china_sea\hourly_24",
1164
1182
  "lon_min": 105,
1165
- "lon_max": 130,
1166
- "lat_min": 15,
1183
+ "lon_max": 135,
1184
+ "lat_min": 10,
1167
1185
  "lat_max": 45,
1168
1186
  "workers": 1,
1169
1187
  "overwrite": False,
1170
1188
  "depth": None,
1171
1189
  "level": None,
1172
- "validate_time": True,
1173
- "idm_path": r'D:\Programs\Internet Download Manager\IDMan.exe',
1174
- "interval_hours": 3,
1190
+ "validate_time": None,
1191
+ # "idm_path": r"D:\Programs\Internet Download Manager\IDMan.exe",
1192
+ "interval_hours": 24,
1175
1193
  }
1176
1194
 
1177
1195
  if single_var:
@@ -1,151 +1,40 @@
1
- #!/usr/bin/env python
2
- # coding=utf-8
3
- """
4
- Author: Liu Kun && 16031215@qq.com
5
- Date: 2024-12-01 19:32:25
6
- LastEditors: Liu Kun && 16031215@qq.com
7
- LastEditTime: 2024-12-10 11:16:36
8
- FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\test.py
9
- Description:
10
- EditPlatform: vscode
11
- ComputerInfo: XPS 15 9510
12
- SystemInfo: Windows 11
13
- Python Version: 3.12
14
- """
15
-
16
- import os
17
- import random
18
1
  import re
19
2
 
20
3
 
21
- def is_valid_user_agent(user_agent):
22
- # 简单的正则表达式来检查User Agent的格式
23
- # 这个正则表达式检查User Agent是否包含常见的浏览器信息格式
4
+ def is_valid_ua(ua):
5
+ # 更宽松的 UA 验证规则
24
6
  pattern = re.compile(
25
- r"^(?:(?:Mozilla|Opera|Chrome|Safari|Edg|OPR)/[\d.]+)"
26
- r"(?:\s(?:\(.*?\)))?"
27
- r"(?:\s(?:Gecko|AppleWebKit|KHTML, like Gecko|Version|Edge|OPR)/[\d.]+)?"
28
- r"(?:\s.*?(?:rv:|Version/|Ubuntu|Macintosh|Windows|X11|Linux|CrOS|FreeBSD|OpenBSD|NetBSD|iPhone|iPad|iPod|Android|BlackBerry|BB10|Mobile|Symbian|Windows Phone|IEMobile|Opera Mini|Opera Mobi|UCBrowser|MQQBrowser|baiduboxapp|baidubrowser|Safari|Firefox|MSIE|Trident|Edge|EdgA|Chrome|CriOS|Vivaldi|Sleipnir|Midori|ELinks|Lynx|w3m|Arora|Epiphany|Konqueror|Dillo|Netscape|SeaMonkey|K-Meleon|Camino|Iceape|Galeon|GranParadiso|Iceweasel|Firefox|Fennec|Conkeror|PaleMoon|Uzbl|QupZilla|Otter|Waterfox|Basilisk|Cyberfox|PaleMoon|GNU IceCat|GNU IceWeasel|IceCat|IceWeasel|Seamonkey|Iceape|Firefox|Epiphany|Web|Safari|Android|Mobile|BlackBerry|BB10|Tablet|Silk|Kindle|FxiOS|Focus|SamsungBrowser|browser|AppleWebKit|Puffin|DuckDuckGo|YaBrowser|Yandex|Amigo|NokiaBrowser|OviBrowser|OneBrowser|Chrome|Firefox|Safari|OPR|Coast|Mercury|Silk|Skyfire|IEMobile|Bolt|Jasmine|NativeHost|Crosswalk|TizenBrowser|SailfishBrowser|SamsungBrowser|Silk-Accelerated|UCBrowser|Quark|XiaoMi|OnePlus|Vivo|Oppo|Realme|Meizu|Lenovo|Huawei|ZTE|Alcatel|Sony|Nokia|LG|HTC|Asus|Acer|Motorola|Samsung)/[\d.]+)?$"
7
+ r"""
8
+ ^Mozilla/(4\.0|5\.0) # 必须以 Mozilla/4.0 或 5.0 开头
9
+ \s+ # 空格
10
+ \(.*?\) # 操作系统信息
11
+ \s+ # 空格
12
+ (AppleWebKit/|Gecko/|Trident/|Version/|Edge/)? # 浏览器引擎或版本标识(可选)
13
+ \d+(\.\d+)* # 至少一个版本号(小数部分可选)
14
+ .* # 允许后续扩展信息
15
+ $ # 行尾
16
+ """,
17
+ re.VERBOSE,
29
18
  )
19
+ return re.match(pattern, ua.strip()) is not None
30
20
 
31
- # 使用正则表达式匹配User Agent字符串
32
- if pattern.match(user_agent):
33
- return True
34
- else:
35
- return False
36
-
37
-
38
- def get_ua():
39
- current_dir = os.path.dirname(os.path.abspath(__file__))
40
- ua_file_txt = os.path.join(current_dir, "User_Agent-list.txt")
41
-
42
- with open(ua_file_txt, "r") as f:
43
- ua_list = f.readlines()
44
- # 去掉换行符和空行
45
- ua_list = [line.strip() for line in ua_list if line.strip()]
46
21
 
47
- return random.choice(ua_list)
22
+ def main():
23
+ input_file = r"E:\Code\Python\My_Funcs\OAFuncs\oafuncs\oa_down\User_Agent-list-old.txt"
24
+ output_file = r"E:\Code\Python\My_Funcs\OAFuncs\oafuncs\oa_down\User_Agent-list.txt"
48
25
 
26
+ valid_uas = []
27
+ with open(input_file, "r", encoding="utf-8") as f:
28
+ for line in f:
29
+ line = line.strip()
30
+ if line and is_valid_ua(line):
31
+ valid_uas.append(line)
49
32
 
50
- def get_ua_org():
51
- ua_list = [
52
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60",
53
- "Opera/8.0 (Windows NT 5.1; U; en)",
54
- "Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50",
55
- "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50",
56
- "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
57
- "Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",
58
- "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
59
- "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
60
- "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
61
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv,2.0.1) Gecko/20100101 Firefox/4.0.1",
62
- "Mozilla/5.0 (Windows NT 6.1; rv,2.0.1) Gecko/20100101 Firefox/4.0.1",
63
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
64
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36",
65
- "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
66
- "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
67
- "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
68
- "Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
69
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36",
70
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
71
- "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
72
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
73
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36",
74
- "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
75
- "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
76
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
77
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
78
- "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
79
- "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)",
80
- "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
81
- "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
82
- "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0",
83
- "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0)",
84
- "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)",
85
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36",
86
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
87
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36",
88
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4094.1 Safari/537.36",
89
- "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
90
- "Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
91
- "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
92
- "Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
93
- "Mozilla/5.0 (Linux; U; Android 2.2.1; zh-cn; HTC_Wildfire_A3333 Build/FRG83D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
94
- "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
95
- "MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
96
- "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
97
- "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
98
- "Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+",
99
- "Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0",
100
- "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;",
101
- "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",
102
- "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",
103
- "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
104
- "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
105
- "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)",
106
- "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)",
107
- "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)",
108
- "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
109
- "Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124",
110
- "Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)",
111
- "UCWEB7.0.2.37/28/999",
112
- "NOKIA5700/UCWEB7.0.2.37/28/999",
113
- "Openwave/UCWEB7.0.2.37/28/999",
114
- "Openwave/UCWEB7.0.2.37/28/999",
115
- ]
116
- with open(newtxtfile, "w") as f:
117
- for line in ua_list:
118
- f.write(line + "\n")
119
- # print(f'Using User-Agent: {ua}')
120
- ua = random.choice(ua_list)
121
- return ua
33
+ with open(output_file, "w", encoding="utf-8") as f:
34
+ f.write("\n".join(valid_uas))
122
35
 
36
+ print(f"[Linux 兼容模式] 有效UA已保存到 {output_file},共 {len(valid_uas)} 条")
123
37
 
124
- # get_ua_org()
125
38
 
126
39
  if __name__ == "__main__":
127
- txtfile = r"E:\Code\Python\My_Funcs\OAFuncs\oafuncs\oa_down\User_Agent-list.txt"
128
-
129
- with open(txtfile, "r") as f:
130
- lines = f.readlines()
131
- # 去掉换行符和空行
132
- lines = [line.strip() for line in lines if line.strip()]
133
- """ new_line = []
134
- for i in range(len(lines)):
135
- if '/' in lines[i]:
136
- new_line.append(lines[i])
137
- else:
138
- print(lines[i]) """
139
-
140
- new_line = []
141
- for line in lines:
142
- if is_valid_user_agent(line):
143
- # print(line)
144
- new_line.append(line)
145
- else:
146
- print(f"Invalid User-Agent: {line}")
147
-
148
- newtxtfile = r"E:\Code\Python\My_Funcs\OAFuncs\oafuncs\oa_down\ua_list_new.txt"
149
- with open(newtxtfile, "w") as f:
150
- for line in new_line:
151
- f.write(line + "\n")
40
+ main()