cloudcheck 8.4.0__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cloudcheck might be problematic. Click here for more details.

cloudcheck/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from .cloudcheck import CloudCheck
2
+
3
+ __all__ = ["CloudCheck"]
cloudcheck/helpers.py ADDED
@@ -0,0 +1,211 @@
1
+ import ipaddress
2
+ import os
3
+ import requests
4
+ from typing import List, Union
5
+
6
+
7
+ def defrag_cidrs(
8
+ cidrs: List[Union[ipaddress.IPv4Network, ipaddress.IPv6Network]],
9
+ ) -> List[Union[ipaddress.IPv4Network, ipaddress.IPv6Network]]:
10
+ """
11
+ Defragment a list of CIDR blocks by merging adjacent networks.
12
+
13
+ Algorithm:
14
+ 1. Sort by network bits (prefix length)
15
+ 2. Iterate through pairs of adjacent networks
16
+ 3. If networks have equal network bits and can be merged into a larger network,
17
+ replace them with the merged network
18
+ 4. Repeat until no more merges are possible
19
+
20
+ Args:
21
+ cidrs: List of IPv4 or IPv6 network objects
22
+
23
+ Returns:
24
+ List of defragmented network objects
25
+ """
26
+ if not cidrs:
27
+ return []
28
+
29
+ # Convert to list and remove duplicates
30
+ networks = list(set(cidrs))
31
+
32
+ # Keep iterating until no more merges happen
33
+ changed = True
34
+ while changed:
35
+ changed = False
36
+
37
+ # Sort by network address
38
+ networks.sort(key=lambda x: (x.prefixlen, x.network_address.packed))
39
+
40
+ i = 0
41
+ while i < len(networks) - 1:
42
+ current = networks[i]
43
+ next_net = networks[i + 1]
44
+
45
+ # Check if we can merge these two networks
46
+ if _can_merge_networks(current, next_net):
47
+ # Create the merged network
48
+ merged = _merge_networks(current, next_net)
49
+
50
+ # Replace the two networks with the merged one
51
+ networks[i] = merged
52
+ networks.pop(i + 1)
53
+ changed = True
54
+ else:
55
+ i += 1
56
+
57
+ return networks
58
+
59
+
60
+ def _can_merge_networks(
61
+ net1: Union[ipaddress.IPv4Network, ipaddress.IPv6Network],
62
+ net2: Union[ipaddress.IPv4Network, ipaddress.IPv6Network],
63
+ ) -> bool:
64
+ """
65
+ Check if two networks can be merged into a larger network.
66
+
67
+ Two networks can be merged if:
68
+ 1. They have the same prefix length
69
+ 2. They are adjacent (one starts where the other ends)
70
+ 3. They can be combined into a network with prefix length - 1
71
+ """
72
+ # Must be same type (IPv4 or IPv6)
73
+ if net1.version != net2.version:
74
+ return False
75
+
76
+ # Must not be the same network
77
+ if net1 == net2:
78
+ return False
79
+
80
+ # Must have same prefix length
81
+ if net1.prefixlen != net2.prefixlen:
82
+ return False
83
+
84
+ # Must be adjacent networks
85
+ if not _are_adjacent_networks(net1, net2):
86
+ return False
87
+
88
+ return True
89
+
90
+
91
+ def _are_adjacent_networks(
92
+ net1: Union[ipaddress.IPv4Network, ipaddress.IPv6Network],
93
+ net2: Union[ipaddress.IPv4Network, ipaddress.IPv6Network],
94
+ ) -> bool:
95
+ """
96
+ Check if two networks are adjacent by creating two networks with sub-1 CIDR
97
+ and checking if they are equal.
98
+ """
99
+ # Must have same prefix length
100
+ if net1.prefixlen != net2.prefixlen:
101
+ return False
102
+
103
+ # Create two networks with sub-1 CIDR
104
+ new_prefixlen = net1.prefixlen - 1
105
+ if new_prefixlen < 0:
106
+ return False
107
+
108
+ # Create the two networks with the reduced prefix length using supernet
109
+ net1_parent = net1.supernet(prefixlen_diff=1)
110
+ net2_parent = net2.supernet(prefixlen_diff=1)
111
+
112
+ # If they are equal, the networks are adjacent
113
+ return net1_parent == net2_parent
114
+
115
+
116
+ def _merge_networks(
117
+ net1: Union[ipaddress.IPv4Network, ipaddress.IPv6Network],
118
+ net2: Union[ipaddress.IPv4Network, ipaddress.IPv6Network],
119
+ ) -> Union[ipaddress.IPv4Network, ipaddress.IPv6Network]:
120
+ """
121
+ Merge two adjacent networks into a larger network.
122
+ """
123
+ if net1 == net2:
124
+ raise ValueError("Networks must be different")
125
+
126
+ if not net1.version == net2.version:
127
+ raise ValueError("Networks must be the same version")
128
+
129
+ snet1 = net1.supernet(prefixlen_diff=1)
130
+ snet2 = net2.supernet(prefixlen_diff=1)
131
+ if not snet1 == snet2:
132
+ raise ValueError("Networks must be adjacent")
133
+
134
+ # Find the smaller network address
135
+ min_addr = min(net1.network_address, net2.network_address)
136
+
137
+ # Create the merged network with prefix length - 1
138
+ new_prefixlen = net1.prefixlen - 1
139
+ try:
140
+ return ipaddress.ip_network(f"{min_addr}/{new_prefixlen}")
141
+ except ValueError:
142
+ raise ValueError(
143
+ f"Failed to merge networks: {net1} (type: {type(net1)}) and {net2} (type: {type(net2)})"
144
+ )
145
+
146
+
147
+ def cidrs_to_strings(
148
+ cidrs: List[Union[ipaddress.IPv4Network, ipaddress.IPv6Network]],
149
+ ) -> List[str]:
150
+ """
151
+ Convert a list of network objects to string representations.
152
+
153
+ Args:
154
+ cidrs: List of network objects
155
+
156
+ Returns:
157
+ List of CIDR strings
158
+ """
159
+ return [str(cidr) for cidr in cidrs]
160
+
161
+
162
+ def strings_to_cidrs(
163
+ cidr_strings: List[str],
164
+ ) -> List[Union[ipaddress.IPv4Network, ipaddress.IPv6Network]]:
165
+ """
166
+ Convert a list of CIDR strings to network objects.
167
+
168
+ Args:
169
+ cidr_strings: List of CIDR strings
170
+
171
+ Returns:
172
+ List of network objects
173
+ """
174
+ networks = []
175
+ for cidr_str in cidr_strings:
176
+ try:
177
+ networks.append(ipaddress.ip_network(cidr_str, strict=False))
178
+ except ValueError:
179
+ # Skip invalid CIDR strings
180
+ continue
181
+ return networks
182
+
183
+
184
+ browser_base_headers = {
185
+ "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
186
+ "accept-language": "en-US,en;q=0.9",
187
+ "cache-control": "no-cache",
188
+ "pragma": "no-cache",
189
+ "priority": "u=0, i",
190
+ "referer": "https://www.google.com/",
191
+ "sec-ch-ua": '"Chromium";v="127", "Not)A;Brand";v="99"',
192
+ "sec-ch-ua-mobile": "?0",
193
+ "sec-ch-ua-platform": '"Linux"',
194
+ "sec-fetch-dest": "document",
195
+ "sec-fetch-mode": "navigate",
196
+ "sec-fetch-site": "cross-site",
197
+ "sec-fetch-user": "?1",
198
+ "upgrade-insecure-requests": "1",
199
+ "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
200
+ }
201
+
202
+
203
+ def request(url, include_api_key=False, browser_headers=False, **kwargs):
204
+ headers = kwargs.get("headers", {})
205
+ if browser_headers:
206
+ headers.update(browser_base_headers)
207
+ bbot_io_api_key = os.getenv("BBOT_IO_API_KEY")
208
+ if include_api_key and bbot_io_api_key:
209
+ headers["Authorization"] = f"Bearer {bbot_io_api_key}"
210
+ kwargs["headers"] = headers
211
+ return requests.get(url, **kwargs)
@@ -0,0 +1,33 @@
1
+ import io
2
+ import zipfile
3
+ from cloudcheck.providers.base import BaseProvider
4
+ from typing import List
5
+
6
+
7
+ class Akamai(BaseProvider):
8
+ v2fly_company: str = "akamai"
9
+ tags: List[str] = ["cloud"]
10
+ # {"org_id": "AKAMAI-ARIN", "org_name": "Akamai Technologies, Inc.", "country": "US", "asns": [12222,16625,16702,17204,17334,18680,18717,20189,22207,22452,23454,23455,26008,30675,31984,32787,33047,35993,35994,36029,36183,393234,393560]}
11
+ # {"org_id": "ORG-AT1-RIPE", "org_name": "Akamai International B.V.", "country": "NL", "asns": [20940,21342,21357,21399,31107,31108,31109,31110,31377,33905,34164,34850,35204,39836,43639,48163,49249,49846,200005,213120]}
12
+ # {"org_id": "ORG-ATI1-AP-APNIC", "org_name": "Akamai Technologies, Inc.", "country": "US", "asns": [23903,24319,45757,55409,55770,63949,133103]}
13
+ org_ids: List[str] = [
14
+ "AKAMAI-ARIN",
15
+ "ORG-AT1-RIPE",
16
+ "ORG-ATI1-AP-APNIC",
17
+ ]
18
+
19
+ _ips_url = "https://techdocs.akamai.com/property-manager/pdfs/akamai_ipv4_ipv6_CIDRs-txt.zip"
20
+
21
+ def fetch_cidrs(self):
22
+ response = self.request(self._ips_url)
23
+ ranges = set()
24
+ content = getattr(response, "content", b"")
25
+ # Extract the contents of the zip file to memory
26
+ with zipfile.ZipFile(io.BytesIO(content)) as zip_file:
27
+ for filename in ("akamai_ipv4_CIDRs.txt", "akamai_ipv6_CIDRs.txt"):
28
+ with zip_file.open(filename) as f:
29
+ for line in f.read().splitlines():
30
+ line = line.decode(errors="ignore").strip()
31
+ if line:
32
+ ranges.add(line)
33
+ return list(ranges)
@@ -0,0 +1,11 @@
1
+ from cloudcheck.providers.base import BaseProvider
2
+ from typing import List
3
+
4
+
5
+ class Alibaba(BaseProvider):
6
+ v2fly_company: str = "alibaba"
7
+ tags: List[str] = ["cloud"]
8
+ # {"org_id": "ORG-ASEP1-AP-APNIC", "org_name": "Alibaba Cloud (Singapore) Private Limited", "country": "SG", "asns": [134963]}
9
+ org_ids: List[str] = [
10
+ "ORG-ASEP1-AP-APNIC",
11
+ ]
@@ -0,0 +1,37 @@
1
+ from cloudcheck.providers.base import BaseProvider
2
+ from typing import List, Dict
3
+
4
+
5
+ class Amazon(BaseProvider):
6
+ v2fly_company: str = "amazon"
7
+ org_ids: List[str] = [
8
+ "AMAZO-139-ARIN", # Amazon.com, Inc., US
9
+ "AMAZO-141-ARIN", # Amazon Technologies, Inc., US
10
+ "AMAZO-22-ARIN", # Amazon Web Services, Inc., US
11
+ "AMAZO-4-ARIN", # Amazon.com, Inc., US
12
+ "AMAZON-4-ARIN", # Amazon.com, Inc., US
13
+ "ARL-76-ARIN", # Amazon Robotics LLC, US
14
+ "ASL-830-ARIN", # Amazon.com Services, LLC, US
15
+ "AT-9049-ARIN", # Amazon Technologies Inc., US
16
+ "AT-9066-ARIN", # Amazon Technologies Inc., US
17
+ "ORG-AARP1-AP-APNIC", # Amazon Asia-Pacific Resources Private Limited, SG
18
+ "ORG-ACSP2-AP-APNIC", # Amazon Corporate Services Pty Ltd, AU
19
+ "ORG-ACTS1-AP-APNIC", # Amazon Connection Technology Services (Beijing) Co., LTD, CN
20
+ "ORG-ADSI1-RIPE", # Amazon Data Services Ireland Ltd, IE
21
+ "ORG-ADSJ1-AP-APNIC", # Amazon Data Services Japan KK, JP
22
+ "ORG-AI2-AP-APNIC", # Amazon.com, Inc., US
23
+ ]
24
+ tags: List[str] = ["cloud"]
25
+ _bucket_name_regex = r"[a-z0-9_][a-z0-9-\.]{1,61}[a-z0-9]"
26
+ regexes: Dict[str, List[str]] = {
27
+ "STORAGE_BUCKET_NAME": [_bucket_name_regex],
28
+ "STORAGE_BUCKET_HOSTNAME": [
29
+ r"(" + _bucket_name_regex + r")\.(s3-?(?:[a-z0-9-]*\.){1,2}amazonaws\.com)"
30
+ ],
31
+ }
32
+
33
+ _ips_url = "https://ip-ranges.amazonaws.com/ip-ranges.json"
34
+
35
+ def fetch_cidrs(self):
36
+ response = self.request(self._ips_url)
37
+ return list(set(p["ip_prefix"] for p in response.json()["prefixes"]))
@@ -0,0 +1,20 @@
1
+ from cloudcheck.providers.base import BaseProvider
2
+ from typing import List
3
+
4
+
5
+ class Arvancloud(BaseProvider):
6
+ domains: List[str] = ["arvancloud.ir"]
7
+ tags: List[str] = ["cdn"]
8
+ # {"org_id": "ORG-AGTL2-RIPE", "org_name": "ARVANCLOUD GLOBAL TECHNOLOGIES L.L.C", "country": "AE", "asns": [57568,208006,210296]}
9
+ org_ids: List[str] = [
10
+ "ORG-AGTL2-RIPE",
11
+ ]
12
+
13
+ _ips_url = "https://www.arvancloud.ir/en/ips.txt"
14
+
15
+ def fetch_cidrs(self):
16
+ response = self.request(self._ips_url)
17
+ ranges = set()
18
+ if getattr(response, "status_code", 0) == 200:
19
+ ranges.update(response.text.splitlines())
20
+ return list(ranges)
@@ -0,0 +1,37 @@
1
+ from cloudcheck.providers.base import BaseProvider
2
+ from typing import List, Dict
3
+
4
+
5
+ class Azure(BaseProvider):
6
+ v2fly_company: str = "azure"
7
+ tags: List[str] = ["cloud"]
8
+ # {"org_id": "MSFT-ARIN", "org_name": "Microsoft Corporation", "country": "US", "asns": [3598,5761,6182,6194,6291,6584,8068,8069,8070,8071,8072,8073,8074,8075,12076,13399,13811,14719,14783,17144,17345,20046,22692,23468,25796,26222,30135,30520,30575,31792,32476,36006,40066,46182,54396,63245,63314,395496,395524,395851,396463,397096,397466,397996,398575,398656,398657,398658,398659,398660,398661,398961,400572,400573,400574,400575,400576,400577,400578,400579,400580,400581,400582,400884]}
9
+ # {"org_id": "ORG-MA42-RIPE", "org_name": "Microsoft Limited", "country": "GB", "asns": [35106]}
10
+ # {"org_id": "ORG-MDMG3-RIPE", "org_name": "Microsoft Deutschland MCIO GmbH", "country": "DE", "asns": [200517]}
11
+ # {"org_id": "ORG-MOPL2-AP-APNIC", "org_name": "Microsoft Operations PTE Ltd", "country": "SG", "asns": [132348]}
12
+ # {"org_id": "ORG-MSPL4-AP-APNIC", "org_name": "Microsoft Singapore Pte. Ltd.", "country": "US", "asns": [45139]}
13
+ org_ids: List[str] = [
14
+ "MSFT-ARIN",
15
+ "ORG-MA42-RIPE",
16
+ "ORG-MDMG3-RIPE",
17
+ "ORG-MOPL2-AP-APNIC",
18
+ "ORG-MSPL4-AP-APNIC",
19
+ ]
20
+ _bucket_name_regex = r"[a-z0-9][a-z0-9-_\.]{1,61}[a-z0-9]"
21
+ regexes: Dict[str, List[str]] = {
22
+ "STORAGE_BUCKET_NAME": [_bucket_name_regex],
23
+ "STORAGE_BUCKET_HOSTNAME": [
24
+ r"(" + _bucket_name_regex + r")\.(blob\.core\.windows\.net)"
25
+ ],
26
+ }
27
+
28
+ _ips_url = "https://download.microsoft.com/download/0/1/8/018E208D-54F8-44CD-AA26-CD7BC9524A8C/PublicIPs_20200824.xml"
29
+
30
+ def fetch_cidrs(self):
31
+ response = self.request(self._ips_url)
32
+ ranges = set()
33
+ for line in response.text.splitlines():
34
+ if "IpRange Subnet" in line:
35
+ ip_range = line.split('"')[1]
36
+ ranges.add(ip_range)
37
+ return list(ranges)
@@ -0,0 +1,11 @@
1
+ from cloudcheck.providers.base import BaseProvider
2
+ from typing import List
3
+
4
+
5
+ class Backblaze(BaseProvider):
6
+ tags: List[str] = ["cloud"]
7
+ # {"org_id": "BACKB-7-ARIN", "org_name": "Backblaze Inc", "country": "US", "asns": [40401,396865]}
8
+ org_ids: List[str] = [
9
+ "BACKB-7-ARIN",
10
+ ]
11
+ domains: List[str] = ["backblaze.com", "backblazeb2.com"]
@@ -0,0 +1,303 @@
1
+ import ipaddress
2
+ import os
3
+ import traceback
4
+ import subprocess
5
+ import time
6
+ from pathlib import Path
7
+ from typing import Dict, List, Union, Set
8
+ from pydantic import BaseModel, field_validator, computed_field
9
+
10
+ from ..helpers import defrag_cidrs, request
11
+
12
+
13
+ v2fly_repo_pulled = False
14
+
15
+
16
+ class BaseProvider(BaseModel):
17
+ """
18
+ Base class for all cloud providers.
19
+
20
+ Each provider inherits from this class and overrides any of the default values.
21
+ They can also override the update_cidrs() method to fetch cidrs from a different source.
22
+ """
23
+
24
+ # these values are static and always loaded from the class definition
25
+ regexes: Dict[str, List[str]] = {}
26
+ tags: List[str] = [] # Tags for the provider (e.g. "cdn", "waf", etc.)
27
+ org_ids: List[str] = [] # ASN Organization IDs (e.g. GOGL-ARIN)
28
+ v2fly_company: str = "" # Company name for v2fly domain fetching
29
+
30
+ # these values are dynamic and set by the update() method
31
+ last_updated: float = time.time()
32
+
33
+ # these we allow static values but they are later merged with dynamic values
34
+ asns: List[int] = []
35
+ cidrs: List[str] = []
36
+ domains: List[str] = []
37
+
38
+ @field_validator("cidrs")
39
+ @classmethod
40
+ def validate_cidrs(cls, value):
41
+ ips = []
42
+ for v in value:
43
+ try:
44
+ ips.append(ipaddress.ip_network(v, strict=False))
45
+ except ValueError:
46
+ print(f"Invalid CIDR: from {cls.__name__}: {v}")
47
+ continue
48
+ ips = [str(ip) for ip in defrag_cidrs(ips)]
49
+ return sorted(ips)
50
+
51
+ @field_validator("domains")
52
+ @classmethod
53
+ def validate_domains(cls, value):
54
+ return sorted(list(set([d.lower().strip(".") for d in value])))
55
+
56
+ @computed_field(return_type=str)
57
+ @property
58
+ def name(self):
59
+ return self.__class__.__name__
60
+
61
+ def __init__(self, **data):
62
+ super().__init__(**data)
63
+ print(f"Initializing {self.name}")
64
+ self._cidrs = []
65
+ self._cache_dir = Path.home() / ".cache" / "cloudcheck"
66
+ self._repo_url = "https://github.com/v2fly/domain-list-community.git"
67
+ self._asndb_url = os.getenv("ASNDB_URL", "https://asndb.api.bbot.io/v1")
68
+ self._bbot_io_api_key = os.getenv("BBOT_IO_API_KEY")
69
+
70
+ def update(self):
71
+ print(f"Updating {self.name}")
72
+ errors = []
73
+ errors.extend(self.update_domains())
74
+ errors.extend(self.update_cidrs())
75
+ return errors
76
+
77
+ def update_domains(self):
78
+ # update dynamic domains
79
+ errors = []
80
+ if self.v2fly_company:
81
+ domains, errors = self.fetch_v2fly_domains()
82
+ if domains:
83
+ self.domains = sorted(list(set(self.domains + domains)))
84
+ else:
85
+ errors.append(
86
+ f"No v2fly domains were found for {self.name} (company name: {self.v2fly_company})"
87
+ )
88
+ return errors
89
+
90
+ def update_cidrs(self):
91
+ cidrs = set()
92
+ errors = []
93
+
94
+ # query by direct ASNs
95
+ if self.asns:
96
+ _cidrs, _errors = self.fetch_asns()
97
+ print(f"Got {len(_cidrs)} ASN cidrs for {self.name}'s ASNs {self.asns}")
98
+ if not _cidrs:
99
+ errors.append(
100
+ f"No ASN cidrs were found for {self.name}'s ASNs {self.asns}"
101
+ )
102
+ errors.extend(_errors)
103
+ cidrs.update(_cidrs)
104
+
105
+ # query by org IDs
106
+ if self.org_ids:
107
+ _cidrs, _asns, _errors = self.fetch_org_ids()
108
+ _asns = _asns.copy()
109
+ _asns.update(self.asns)
110
+ self.asns = list(sorted(_asns))
111
+ print(
112
+ f"Got {len(_cidrs)} org id cidrs for {self.name}'s org ids {self.org_ids}"
113
+ )
114
+ if not _cidrs:
115
+ errors.append(
116
+ f"No cidrs were found for {self.name}'s org ids {self.org_ids}"
117
+ )
118
+ errors.extend(_errors)
119
+ cidrs.update(_cidrs)
120
+
121
+ # fetch any dynamically-updated lists of CIDRs
122
+ try:
123
+ dynamic_cidrs = self.fetch_cidrs()
124
+ print(f"Got {len(dynamic_cidrs)} dynamic cidrs for {self.name}")
125
+ cidrs.update(dynamic_cidrs)
126
+ except Exception as e:
127
+ errors.append(
128
+ f"Failed to fetch dynamic cidrs for {self.name}: {e}:\n{traceback.format_exc()}"
129
+ )
130
+
131
+ # finally, put in any manually-specified CIDRs
132
+ print(f"Adding {len(self.cidrs)} manually-specified cidrs for {self.name}")
133
+ if self.cidrs:
134
+ cidrs.update(self.cidrs)
135
+
136
+ print(f"Total {len(cidrs)} cidrs for {self.name}")
137
+
138
+ try:
139
+ self.cidrs = self.validate_cidrs(cidrs)
140
+ except Exception as e:
141
+ errors.append(
142
+ f"Error validating ASN cidrs for {self.name}: {e}:\n{traceback.format_exc()}"
143
+ )
144
+
145
+ self.last_updated = time.time()
146
+
147
+ return errors
148
+
149
+ def fetch_org_ids(
150
+ self,
151
+ ) -> List[Union[ipaddress.IPv4Network, ipaddress.IPv6Network]]:
152
+ """Takes org_ids and populates the .asns and .cidrs attributes."""
153
+ errors = []
154
+ cidrs = set()
155
+ print(f"Fetching {len(self.org_ids)} org ids for {self.name}")
156
+ asns = set()
157
+ for org_id in self.org_ids:
158
+ print(f"Fetching cidrs for {org_id} from asndb")
159
+ try:
160
+ url = f"{self._asndb_url}/org/{org_id}"
161
+ print(f"Fetching {url}")
162
+ res = self.request(url, include_api_key=True)
163
+ print(f"{url} -> {res}: {res.text}")
164
+ j = res.json()
165
+ except Exception as e:
166
+ errors.append(
167
+ f"Failed to fetch cidrs for {org_id} from asndb: {e}:\n{traceback.format_exc()}"
168
+ )
169
+ continue
170
+ _asns = j.get("asns", [])
171
+ for asn in _asns:
172
+ asns.add(asn)
173
+ asn_cidrs, _errors = self.fetch_asn(asn)
174
+ errors.extend(_errors)
175
+ cidrs.update(asn_cidrs)
176
+ return cidrs, asns, errors
177
+
178
+ def fetch_asns(self) -> List[Union[ipaddress.IPv4Network, ipaddress.IPv6Network]]:
179
+ """Fetch CIDRs for a given list of ASNs from ASNDB."""
180
+ cidrs = []
181
+ errors = []
182
+ print(f"Fetching {len(self.asns)} ASNs for {self.name}")
183
+ for asn in self.asns:
184
+ asn_cidrs, _errors = self.fetch_asn(asn)
185
+ errors.extend(_errors)
186
+ cidrs.update(asn_cidrs)
187
+ return cidrs, errors
188
+
189
+ def fetch_asn(
190
+ self, asn: int
191
+ ) -> List[Union[ipaddress.IPv4Network, ipaddress.IPv6Network]]:
192
+ """Fetch CIDRs for a given ASN from ASNDB."""
193
+ cidrs = []
194
+ errors = []
195
+ url = f"{self._asndb_url}/asn/{asn}"
196
+ print(f"Fetching {url}")
197
+ try:
198
+ res = self.request(url, include_api_key=True)
199
+ print(f"{url} -> {res.text}")
200
+ j = res.json()
201
+ cidrs = j.get("subnets", [])
202
+ except Exception as e:
203
+ errors.append(
204
+ f"Failed to fetch cidrs for {asn} from asndb: {e}:\n{traceback.format_exc()}"
205
+ )
206
+ print(f"Got {len(cidrs)} cidrs for {asn}")
207
+ return cidrs, errors
208
+
209
+ def fetch_v2fly_domains(self) -> List[str]:
210
+ """Fetch domains from the v2fly community repository."""
211
+ if not self.v2fly_company:
212
+ return [], []
213
+
214
+ errors = []
215
+ repo_path, _success = self._ensure_v2fly_repo_cached()
216
+ company_file = repo_path / "data" / self.v2fly_company
217
+ try:
218
+ domains = self._parse_v2fly_domain_file(company_file)
219
+ except Exception as e:
220
+ errors.append(
221
+ f"Failed to parse {self.v2fly_company} domains: {e}:\n{traceback.format_exc()}"
222
+ )
223
+ return sorted(list(domains)), errors
224
+
225
+ def fetch_cidrs(self) -> List[str]:
226
+ """Fetch CIDRs from a custom source."""
227
+ return []
228
+
229
+ def fetch_domains(self) -> List[str]:
230
+ """Fetch domains from a custom source."""
231
+ return []
232
+
233
+ def _ensure_v2fly_repo_cached(self) -> Path:
234
+ """Ensure the community repo is cloned and up-to-date."""
235
+ global v2fly_repo_pulled
236
+ errors = []
237
+ repo_dir = self._cache_dir / "domain-list-community"
238
+ if not repo_dir.exists():
239
+ self._cache_dir.mkdir(parents=True, exist_ok=True)
240
+ try:
241
+ subprocess.run(
242
+ ["git", "clone", "--depth", "1", self._repo_url, str(repo_dir)],
243
+ check=True,
244
+ capture_output=True,
245
+ )
246
+ v2fly_repo_pulled = True
247
+ except subprocess.CalledProcessError as e:
248
+ errors.append(
249
+ f"Failed to clone v2fly repo: {e}:\n{traceback.format_exc()}"
250
+ )
251
+ elif not v2fly_repo_pulled:
252
+ try:
253
+ subprocess.run(
254
+ ["git", "pull"], cwd=repo_dir, check=True, capture_output=True
255
+ )
256
+ except subprocess.CalledProcessError as e:
257
+ errors.append(
258
+ f"Failed to pull v2fly repo: {e}:\n{traceback.format_exc()}"
259
+ )
260
+ return repo_dir, errors
261
+
262
+ def _parse_v2fly_domain_file(self, file_path: Path) -> Set[str]:
263
+ """Parse a domain list file and extract domains."""
264
+ print(f"Parsing {file_path}")
265
+ domains = set()
266
+ if not file_path.exists():
267
+ print(f"File {file_path} does not exist")
268
+ return domains
269
+
270
+ with open(file_path, "r", encoding="utf-8") as f:
271
+ for line in f:
272
+ line = line.strip()
273
+ if not line or line.startswith("#"):
274
+ continue
275
+
276
+ if line.startswith("include:"):
277
+ include_file = line[8:]
278
+ include_path = file_path.parent / include_file
279
+ domains.update(self._parse_v2fly_domain_file(include_path))
280
+ continue
281
+
282
+ if line.startswith("domain:"):
283
+ domain = line[7:]
284
+ elif line.startswith("full:"):
285
+ domain = line[5:]
286
+ elif line.startswith("keyword:") or line.startswith("regexp:"):
287
+ continue
288
+ else:
289
+ domain = line
290
+
291
+ domain = domain.split("@")[0].strip()
292
+ if domain:
293
+ domains.add(domain.lower())
294
+ return domains
295
+
296
+ def request(self, *args, **kwargs):
297
+ return request(*args, **kwargs)
298
+
299
+ def __str__(self):
300
+ return self.name
301
+
302
+ def __repr__(self):
303
+ return f"{self.__class__.__name__}(name='{self.name}')"