oafuncs 0.0.98.3__py3-none-any.whl → 0.0.98.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oafuncs/_script/parallel.py +158 -509
- oafuncs/_script/parallel_test.py +14 -0
- oafuncs/oa_down/User_Agent-list.txt +1 -1611
- oafuncs/oa_down/hycom_3hourly.py +112 -94
- oafuncs/oa_down/test_ua.py +27 -138
- oafuncs/oa_tool.py +118 -30
- {oafuncs-0.0.98.3.dist-info → oafuncs-0.0.98.5.dist-info}/METADATA +2 -1
- {oafuncs-0.0.98.3.dist-info → oafuncs-0.0.98.5.dist-info}/RECORD +11 -12
- oafuncs/_script/parallel_example_usage.py +0 -83
- oafuncs/oa_down/hycom_3hourly_20250407.py +0 -1295
- {oafuncs-0.0.98.3.dist-info → oafuncs-0.0.98.5.dist-info}/WHEEL +0 -0
- {oafuncs-0.0.98.3.dist-info → oafuncs-0.0.98.5.dist-info}/licenses/LICENSE.txt +0 -0
- {oafuncs-0.0.98.3.dist-info → oafuncs-0.0.98.5.dist-info}/top_level.txt +0 -0
oafuncs/oa_down/hycom_3hourly.py
CHANGED
@@ -13,7 +13,9 @@ SystemInfo: Windows 11
|
|
13
13
|
Python Version: 3.12
|
14
14
|
"""
|
15
15
|
|
16
|
+
import asyncio
|
16
17
|
import datetime
|
18
|
+
import logging
|
17
19
|
import os
|
18
20
|
import random
|
19
21
|
import re
|
@@ -23,11 +25,11 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
23
25
|
from pathlib import Path
|
24
26
|
from threading import Lock
|
25
27
|
|
28
|
+
import httpx
|
26
29
|
import matplotlib.pyplot as plt
|
27
30
|
import netCDF4 as nc
|
28
31
|
import numpy as np
|
29
32
|
import pandas as pd
|
30
|
-
import requests
|
31
33
|
import xarray as xr
|
32
34
|
from rich import print
|
33
35
|
from rich.progress import Progress
|
@@ -38,6 +40,9 @@ from oafuncs.oa_file import file_size
|
|
38
40
|
from oafuncs.oa_nc import check as check_nc
|
39
41
|
from oafuncs.oa_nc import modify as modify_nc
|
40
42
|
|
43
|
+
logging.getLogger("httpx").setLevel(logging.WARNING) # 关闭 httpx 的 INFO 日志,只显示 WARNING 及以上
|
44
|
+
|
45
|
+
|
41
46
|
warnings.filterwarnings("ignore", category=RuntimeWarning, message="Engine '.*' loading failed:.*")
|
42
47
|
|
43
48
|
__all__ = ["draw_time_range", "download"]
|
@@ -414,13 +419,13 @@ def _check_time_in_dataset_and_version(time_input, time_end=None):
|
|
414
419
|
trange_list.append(f"{time_s}-{time_e}")
|
415
420
|
have_data = True
|
416
421
|
|
417
|
-
# 输出结果
|
418
|
-
if match_time is None:
|
419
|
-
print(f"[bold red]{time_input_str} is in the following dataset and version:")
|
420
422
|
if have_data:
|
421
423
|
if match_time is None:
|
424
|
+
print(f"[bold red]Time {time_input_str} included in:")
|
425
|
+
dv_num = 1
|
422
426
|
for d, v, trange in zip(d_list, v_list, trange_list):
|
423
|
-
print(f"[bold blue]{d} {v} {trange}")
|
427
|
+
print(f"{dv_num} -> [bold blue]{d} - {v} : {trange}")
|
428
|
+
dv_num += 1
|
424
429
|
if is_single_time:
|
425
430
|
return True
|
426
431
|
else:
|
@@ -432,7 +437,7 @@ def _check_time_in_dataset_and_version(time_input, time_end=None):
|
|
432
437
|
print(f"[bold red]{time_start} to {time_end} is in different datasets or versions, so you can't download them together")
|
433
438
|
return False
|
434
439
|
else:
|
435
|
-
print(f"[bold red]{time_input_str}
|
440
|
+
print(f"[bold red]Time {time_input_str} has no data")
|
436
441
|
return False
|
437
442
|
|
438
443
|
|
@@ -507,7 +512,8 @@ def _direct_choose_dataset_and_version(time_input, time_end=None):
|
|
507
512
|
|
508
513
|
if dataset_name_out is not None and version_name_out is not None:
|
509
514
|
if match_time is None:
|
510
|
-
print(f"[bold purple]dataset: {dataset_name_out}, version: {version_name_out} is chosen")
|
515
|
+
# print(f"[bold purple]dataset: {dataset_name_out}, version: {version_name_out} is chosen")
|
516
|
+
print(f"[bold purple]Chosen dataset: {dataset_name_out} - {version_name_out}")
|
511
517
|
|
512
518
|
# 如果没有找到匹配的数据集和版本,会返回 None
|
513
519
|
return dataset_name_out, version_name_out
|
@@ -611,7 +617,7 @@ def _get_mean_size_move(same_file, current_file):
|
|
611
617
|
size_difference_ratio = (current_file_size - fsize_dict[same_file]["mean_size"]) / fsize_dict[same_file]["mean_size"]
|
612
618
|
|
613
619
|
if abs(size_difference_ratio) > tolerance_ratio:
|
614
|
-
if check_nc(current_file,print_messages=False):
|
620
|
+
if check_nc(current_file, print_messages=False):
|
615
621
|
fsize_dict[same_file]["size_list"] = [current_file_size]
|
616
622
|
fsize_dict[same_file]["mean_size"] = current_file_size
|
617
623
|
else:
|
@@ -662,110 +668,122 @@ def _correct_time(nc_file):
|
|
662
668
|
modify_nc(nc_file, "time", None, time_difference)
|
663
669
|
|
664
670
|
|
671
|
+
class _HycomDownloader:
|
672
|
+
def __init__(self, tasks, delay_range=(3, 6), timeout_factor=120, max_var_count=5, max_retries=3):
|
673
|
+
"""
|
674
|
+
:param tasks: List of (url, save_path)
|
675
|
+
"""
|
676
|
+
self.tasks = tasks
|
677
|
+
self.delay_range = delay_range
|
678
|
+
self.timeout_factor = timeout_factor
|
679
|
+
self.max_var_count = max_var_count
|
680
|
+
self.max_retries = max_retries
|
681
|
+
self.count = {"success": 0, "fail": 0}
|
682
|
+
|
683
|
+
def user_agent(self):
|
684
|
+
return get_ua()
|
685
|
+
|
686
|
+
async def _download_one(self, url, save_path):
|
687
|
+
file_name = os.path.basename(save_path)
|
688
|
+
headers = {"User-Agent": self.user_agent()}
|
689
|
+
var_count = min(max(url.count("var="), 1), self.max_var_count)
|
690
|
+
timeout_max = self.timeout_factor * var_count
|
691
|
+
timeout = random.randint(timeout_max // 2, timeout_max)
|
692
|
+
|
693
|
+
retry = 0
|
694
|
+
while retry <= self.max_retries:
|
695
|
+
try:
|
696
|
+
await asyncio.sleep(random.uniform(*self.delay_range))
|
697
|
+
start = datetime.datetime.now()
|
698
|
+
async with httpx.AsyncClient(
|
699
|
+
timeout=httpx.Timeout(timeout),
|
700
|
+
limits=httpx.Limits(max_connections=2, max_keepalive_connections=2),
|
701
|
+
transport=httpx.AsyncHTTPTransport(retries=2),
|
702
|
+
) as client:
|
703
|
+
print(f"[bold #f0f6d0]Requesting {file_name} (Attempt {retry + 1}) ...")
|
704
|
+
response = await client.get(url, headers=headers, follow_redirects=True)
|
705
|
+
response.raise_for_status()
|
706
|
+
if not response.content:
|
707
|
+
raise ValueError("Empty response received")
|
708
|
+
|
709
|
+
print(f"[bold #96cbd7]Downloading {file_name} ...")
|
710
|
+
with open(save_path, "wb") as f:
|
711
|
+
async for chunk in response.aiter_bytes(32 * 1024):
|
712
|
+
f.write(chunk)
|
713
|
+
|
714
|
+
elapsed = datetime.datetime.now() - start
|
715
|
+
print(f"[#3dfc40]File [bold #dfff73]{file_name} [#3dfc40]downloaded, Time: [#39cbdd]{elapsed}")
|
716
|
+
self.count["success"] += 1
|
717
|
+
count_dict["success"] += 1
|
718
|
+
return
|
719
|
+
|
720
|
+
except Exception as e:
|
721
|
+
print(f"[bold red]Failed ({type(e).__name__}): {e}")
|
722
|
+
if retry < self.max_retries:
|
723
|
+
backoff = 2**retry
|
724
|
+
print(f"[yellow]Retrying in {backoff:.1f}s ...")
|
725
|
+
await asyncio.sleep(backoff)
|
726
|
+
retry += 1
|
727
|
+
else:
|
728
|
+
print(f"[red]Giving up on {file_name}")
|
729
|
+
self.count["fail"] += 1
|
730
|
+
count_dict["fail"] += 1
|
731
|
+
return
|
732
|
+
|
733
|
+
async def run(self):
|
734
|
+
print(f"📥 Starting download of {len(self.tasks)} files ...")
|
735
|
+
for url, save_path in self.tasks:
|
736
|
+
await self._download_one(url, save_path)
|
737
|
+
|
738
|
+
print("\n✅ All tasks completed.")
|
739
|
+
print(f"✔️ Success: {self.count['success']} | ❌ Fail: {self.count['fail']}")
|
740
|
+
|
741
|
+
|
665
742
|
def _download_file(target_url, store_path, file_name, cover=False):
|
666
|
-
|
743
|
+
save_path = Path(store_path) / file_name
|
667
744
|
file_name_split = file_name.split("_")
|
668
745
|
file_name_split = file_name_split[:-1]
|
669
746
|
same_file = "_".join(file_name_split) + "*nc"
|
670
747
|
|
671
748
|
if match_time is not None:
|
672
|
-
if check_nc(
|
673
|
-
if not _check_ftime(
|
749
|
+
if check_nc(save_path, print_messages=False):
|
750
|
+
if not _check_ftime(save_path, if_print=True):
|
674
751
|
if match_time:
|
675
|
-
_correct_time(
|
752
|
+
_correct_time(save_path)
|
676
753
|
count_dict["skip"] += 1
|
677
754
|
else:
|
678
|
-
_clear_existing_file(
|
755
|
+
_clear_existing_file(save_path)
|
679
756
|
count_dict["no_data"] += 1
|
680
757
|
else:
|
681
758
|
count_dict["skip"] += 1
|
682
759
|
print(f"[bold green]{file_name} is correct")
|
683
760
|
return
|
684
761
|
|
685
|
-
if not cover and os.path.exists(
|
686
|
-
print(f"[bold #FFA54F]{
|
762
|
+
if not cover and os.path.exists(save_path):
|
763
|
+
print(f"[bold #FFA54F]{save_path} exists, skipping ...")
|
687
764
|
count_dict["skip"] += 1
|
688
765
|
return
|
689
766
|
|
690
767
|
if same_file not in fsize_dict.keys():
|
691
|
-
check_nc(
|
768
|
+
check_nc(save_path, delete_if_invalid=True, print_messages=False)
|
692
769
|
|
693
|
-
get_mean_size = _get_mean_size_move(same_file,
|
770
|
+
get_mean_size = _get_mean_size_move(same_file, save_path)
|
694
771
|
|
695
|
-
if _check_existing_file(
|
772
|
+
if _check_existing_file(save_path, get_mean_size):
|
696
773
|
count_dict["skip"] += 1
|
697
774
|
return
|
698
775
|
|
699
|
-
_clear_existing_file(
|
776
|
+
_clear_existing_file(save_path)
|
700
777
|
|
701
778
|
if not use_idm:
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
request_times = 0
|
706
|
-
|
707
|
-
def calculate_wait_time(time_str, target_url):
|
708
|
-
time_pattern = r"\d{10}"
|
709
|
-
times_in_str = re.findall(time_pattern, time_str)
|
710
|
-
num_times_str = len(times_in_str)
|
711
|
-
|
712
|
-
if num_times_str > 1:
|
713
|
-
delta_t = datetime.datetime.strptime(times_in_str[1], "%Y%m%d%H") - datetime.datetime.strptime(times_in_str[0], "%Y%m%d%H")
|
714
|
-
delta_t = delta_t.total_seconds() / 3600
|
715
|
-
delta_t = delta_t / 3 + 1
|
716
|
-
else:
|
717
|
-
delta_t = 1
|
718
|
-
num_var = int(target_url.count("var="))
|
719
|
-
if num_var <= 0:
|
720
|
-
num_var = 1
|
721
|
-
return int(delta_t * 5 * 60 * num_var)
|
722
|
-
|
723
|
-
max_timeout = calculate_wait_time(file_name, target_url)
|
724
|
-
print(f"[bold #912dbc]Max timeout: {max_timeout} seconds")
|
725
|
-
|
726
|
-
download_time_s = datetime.datetime.now()
|
727
|
-
order_list = ["1st", "2nd", "3rd", "4th", "5th", "6th", "7th", "8th", "9th", "10th"]
|
728
|
-
while not download_success:
|
729
|
-
if request_times >= 10:
|
730
|
-
print(f"[bold #ffe5c0]Download failed after {request_times} times\nYou can skip it and try again later")
|
731
|
-
count_dict["fail"] += 1
|
732
|
-
break
|
733
|
-
if request_times > 0:
|
734
|
-
print(f"[bold #ffe5c0]Retrying the {order_list[request_times - 1]} time...")
|
735
|
-
try:
|
736
|
-
headers = {"User-Agent": get_ua()}
|
737
|
-
response = s.get(target_url, headers=headers, stream=True, timeout=random.randint(5, max_timeout))
|
738
|
-
response.raise_for_status()
|
739
|
-
with open(fname, "wb") as f:
|
740
|
-
print(f"[bold #96cbd7]Downloading {file_name} ...")
|
741
|
-
for chunk in response.iter_content(chunk_size=1024):
|
742
|
-
if chunk:
|
743
|
-
f.write(chunk)
|
744
|
-
|
745
|
-
f.close()
|
746
|
-
|
747
|
-
if os.path.exists(fname):
|
748
|
-
download_success = True
|
749
|
-
download_time_e = datetime.datetime.now()
|
750
|
-
download_delta = download_time_e - download_time_s
|
751
|
-
print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{download_delta}")
|
752
|
-
count_dict["success"] += 1
|
753
|
-
|
754
|
-
except requests.exceptions.HTTPError as errh:
|
755
|
-
print(f"Http Error: {errh}")
|
756
|
-
except requests.exceptions.ConnectionError as errc:
|
757
|
-
print(f"Error Connecting: {errc}")
|
758
|
-
except requests.exceptions.Timeout as errt:
|
759
|
-
print(f"Timeout Error: {errt}")
|
760
|
-
except requests.exceptions.RequestException as err:
|
761
|
-
print(f"OOps: Something Else: {err}")
|
762
|
-
|
763
|
-
time.sleep(3)
|
764
|
-
request_times += 1
|
779
|
+
python_downloader = _HycomDownloader([(target_url, save_path)])
|
780
|
+
asyncio.run(python_downloader.run())
|
781
|
+
time.sleep(3 + random.uniform(0, 10))
|
765
782
|
else:
|
766
783
|
idm_downloader(target_url, store_path, file_name, given_idm_engine)
|
767
|
-
idm_download_list.append(
|
768
|
-
print(f"[bold #3dfc40]File [bold #dfff73]{
|
784
|
+
idm_download_list.append(save_path)
|
785
|
+
# print(f"[bold #3dfc40]File [bold #dfff73]{save_path} [#3dfc40]has been submit to IDM for downloading")
|
786
|
+
time.sleep(3 + random.uniform(0, 10))
|
769
787
|
|
770
788
|
|
771
789
|
def _check_hour_is_valid(ymdh_str):
|
@@ -992,7 +1010,7 @@ def download(
|
|
992
1010
|
|
993
1011
|
Returns:
|
994
1012
|
None
|
995
|
-
|
1013
|
+
|
996
1014
|
Example:
|
997
1015
|
>>> download(
|
998
1016
|
variables='u',
|
@@ -1088,7 +1106,7 @@ def download(
|
|
1088
1106
|
|
1089
1107
|
if validate_time is not None:
|
1090
1108
|
workers = 1
|
1091
|
-
print(
|
1109
|
+
print("*" * mark_len)
|
1092
1110
|
print("[bold red]Only checking the time of existing files.")
|
1093
1111
|
bar_desc = "Checking time ..."
|
1094
1112
|
|
@@ -1158,20 +1176,20 @@ if __name__ == "__main__":
|
|
1158
1176
|
|
1159
1177
|
options = {
|
1160
1178
|
"variables": var_list,
|
1161
|
-
"start_time": "
|
1162
|
-
"end_time": "
|
1163
|
-
"output_dir": r"
|
1179
|
+
"start_time": "2018010100",
|
1180
|
+
"end_time": "2019063000",
|
1181
|
+
"output_dir": r"G:\Data\HYCOM\china_sea\hourly_24",
|
1164
1182
|
"lon_min": 105,
|
1165
|
-
"lon_max":
|
1166
|
-
"lat_min":
|
1183
|
+
"lon_max": 135,
|
1184
|
+
"lat_min": 10,
|
1167
1185
|
"lat_max": 45,
|
1168
1186
|
"workers": 1,
|
1169
1187
|
"overwrite": False,
|
1170
1188
|
"depth": None,
|
1171
1189
|
"level": None,
|
1172
|
-
"validate_time":
|
1173
|
-
"idm_path": r
|
1174
|
-
"interval_hours":
|
1190
|
+
"validate_time": None,
|
1191
|
+
# "idm_path": r"D:\Programs\Internet Download Manager\IDMan.exe",
|
1192
|
+
"interval_hours": 24,
|
1175
1193
|
}
|
1176
1194
|
|
1177
1195
|
if single_var:
|
oafuncs/oa_down/test_ua.py
CHANGED
@@ -1,151 +1,40 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# coding=utf-8
|
3
|
-
"""
|
4
|
-
Author: Liu Kun && 16031215@qq.com
|
5
|
-
Date: 2024-12-01 19:32:25
|
6
|
-
LastEditors: Liu Kun && 16031215@qq.com
|
7
|
-
LastEditTime: 2024-12-10 11:16:36
|
8
|
-
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\test.py
|
9
|
-
Description:
|
10
|
-
EditPlatform: vscode
|
11
|
-
ComputerInfo: XPS 15 9510
|
12
|
-
SystemInfo: Windows 11
|
13
|
-
Python Version: 3.12
|
14
|
-
"""
|
15
|
-
|
16
|
-
import os
|
17
|
-
import random
|
18
1
|
import re
|
19
2
|
|
20
3
|
|
21
|
-
def
|
22
|
-
#
|
23
|
-
# 这个正则表达式检查User Agent是否包含常见的浏览器信息格式
|
4
|
+
def is_valid_ua(ua):
|
5
|
+
# 更宽松的 UA 验证规则
|
24
6
|
pattern = re.compile(
|
25
|
-
r"
|
26
|
-
|
27
|
-
|
28
|
-
|
7
|
+
r"""
|
8
|
+
^Mozilla/(4\.0|5\.0) # 必须以 Mozilla/4.0 或 5.0 开头
|
9
|
+
\s+ # 空格
|
10
|
+
\(.*?\) # 操作系统信息
|
11
|
+
\s+ # 空格
|
12
|
+
(AppleWebKit/|Gecko/|Trident/|Version/|Edge/)? # 浏览器引擎或版本标识(可选)
|
13
|
+
\d+(\.\d+)* # 至少一个版本号(小数部分可选)
|
14
|
+
.* # 允许后续扩展信息
|
15
|
+
$ # 行尾
|
16
|
+
""",
|
17
|
+
re.VERBOSE,
|
29
18
|
)
|
19
|
+
return re.match(pattern, ua.strip()) is not None
|
30
20
|
|
31
|
-
# 使用正则表达式匹配User Agent字符串
|
32
|
-
if pattern.match(user_agent):
|
33
|
-
return True
|
34
|
-
else:
|
35
|
-
return False
|
36
|
-
|
37
|
-
|
38
|
-
def get_ua():
|
39
|
-
current_dir = os.path.dirname(os.path.abspath(__file__))
|
40
|
-
ua_file_txt = os.path.join(current_dir, "User_Agent-list.txt")
|
41
|
-
|
42
|
-
with open(ua_file_txt, "r") as f:
|
43
|
-
ua_list = f.readlines()
|
44
|
-
# 去掉换行符和空行
|
45
|
-
ua_list = [line.strip() for line in ua_list if line.strip()]
|
46
21
|
|
47
|
-
|
22
|
+
def main():
|
23
|
+
input_file = r"E:\Code\Python\My_Funcs\OAFuncs\oafuncs\oa_down\User_Agent-list-old.txt"
|
24
|
+
output_file = r"E:\Code\Python\My_Funcs\OAFuncs\oafuncs\oa_down\User_Agent-list.txt"
|
48
25
|
|
26
|
+
valid_uas = []
|
27
|
+
with open(input_file, "r", encoding="utf-8") as f:
|
28
|
+
for line in f:
|
29
|
+
line = line.strip()
|
30
|
+
if line and is_valid_ua(line):
|
31
|
+
valid_uas.append(line)
|
49
32
|
|
50
|
-
|
51
|
-
|
52
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60",
|
53
|
-
"Opera/8.0 (Windows NT 5.1; U; en)",
|
54
|
-
"Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50",
|
55
|
-
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50",
|
56
|
-
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
|
57
|
-
"Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",
|
58
|
-
"Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
|
59
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
|
60
|
-
"Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
|
61
|
-
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv,2.0.1) Gecko/20100101 Firefox/4.0.1",
|
62
|
-
"Mozilla/5.0 (Windows NT 6.1; rv,2.0.1) Gecko/20100101 Firefox/4.0.1",
|
63
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
|
64
|
-
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36",
|
65
|
-
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
|
66
|
-
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
67
|
-
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
68
|
-
"Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
69
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36",
|
70
|
-
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
|
71
|
-
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
|
72
|
-
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
|
73
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36",
|
74
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
|
75
|
-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
|
76
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
|
77
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
|
78
|
-
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
|
79
|
-
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)",
|
80
|
-
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
|
81
|
-
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
|
82
|
-
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0",
|
83
|
-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0)",
|
84
|
-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)",
|
85
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36",
|
86
|
-
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
|
87
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36",
|
88
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4094.1 Safari/537.36",
|
89
|
-
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
90
|
-
"Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
91
|
-
"Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
|
92
|
-
"Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
93
|
-
"Mozilla/5.0 (Linux; U; Android 2.2.1; zh-cn; HTC_Wildfire_A3333 Build/FRG83D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
|
94
|
-
"Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
|
95
|
-
"MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
|
96
|
-
"Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
|
97
|
-
"Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
|
98
|
-
"Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+",
|
99
|
-
"Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0",
|
100
|
-
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;",
|
101
|
-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",
|
102
|
-
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",
|
103
|
-
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
|
104
|
-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
|
105
|
-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)",
|
106
|
-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)",
|
107
|
-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)",
|
108
|
-
"Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
|
109
|
-
"Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124",
|
110
|
-
"Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)",
|
111
|
-
"UCWEB7.0.2.37/28/999",
|
112
|
-
"NOKIA5700/UCWEB7.0.2.37/28/999",
|
113
|
-
"Openwave/UCWEB7.0.2.37/28/999",
|
114
|
-
"Openwave/UCWEB7.0.2.37/28/999",
|
115
|
-
]
|
116
|
-
with open(newtxtfile, "w") as f:
|
117
|
-
for line in ua_list:
|
118
|
-
f.write(line + "\n")
|
119
|
-
# print(f'Using User-Agent: {ua}')
|
120
|
-
ua = random.choice(ua_list)
|
121
|
-
return ua
|
33
|
+
with open(output_file, "w", encoding="utf-8") as f:
|
34
|
+
f.write("\n".join(valid_uas))
|
122
35
|
|
36
|
+
print(f"[Linux 兼容模式] 有效UA已保存到 {output_file},共 {len(valid_uas)} 条")
|
123
37
|
|
124
|
-
# get_ua_org()
|
125
38
|
|
126
39
|
if __name__ == "__main__":
|
127
|
-
|
128
|
-
|
129
|
-
with open(txtfile, "r") as f:
|
130
|
-
lines = f.readlines()
|
131
|
-
# 去掉换行符和空行
|
132
|
-
lines = [line.strip() for line in lines if line.strip()]
|
133
|
-
""" new_line = []
|
134
|
-
for i in range(len(lines)):
|
135
|
-
if '/' in lines[i]:
|
136
|
-
new_line.append(lines[i])
|
137
|
-
else:
|
138
|
-
print(lines[i]) """
|
139
|
-
|
140
|
-
new_line = []
|
141
|
-
for line in lines:
|
142
|
-
if is_valid_user_agent(line):
|
143
|
-
# print(line)
|
144
|
-
new_line.append(line)
|
145
|
-
else:
|
146
|
-
print(f"Invalid User-Agent: {line}")
|
147
|
-
|
148
|
-
newtxtfile = r"E:\Code\Python\My_Funcs\OAFuncs\oafuncs\oa_down\ua_list_new.txt"
|
149
|
-
with open(newtxtfile, "w") as f:
|
150
|
-
for line in new_line:
|
151
|
-
f.write(line + "\n")
|
40
|
+
main()
|