py-cloudip 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cloudip/__init__.py ADDED
@@ -0,0 +1,166 @@
1
+ """py-cloudip — fast cloud-provider IP detection.
2
+
3
+ Detect whether an IP address belongs to AWS, GCP, Azure, Cloudflare,
4
+ DigitalOcean, or Oracle Cloud. Data comes from the rezmoss/cloudip-db database
5
+ (network fetch with SHA-256 verification, on-disk cache, and an embedded
6
+ offline fallback).
7
+
8
+ Quick start::
9
+
10
+ import cloudip
11
+ cloudip.is_aws("52.94.76.1") # True
12
+ cloudip.get_provider("34.64.0.1") # "gcp"
13
+ cloudip.lookup("52.94.76.1") # LookupResult(found=True, provider="aws", ...)
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import threading
19
+ from typing import List, Optional, Union
20
+
21
+ from .constants import (
22
+ PROVIDER_AWS,
23
+ PROVIDER_AZURE,
24
+ PROVIDER_CLOUDFLARE,
25
+ PROVIDER_DIGITALOCEAN,
26
+ PROVIDER_GCP,
27
+ PROVIDER_ORACLE,
28
+ )
29
+ from .detector import Detector, load_version, new_detector
30
+ from .types import (
31
+ CheckUpdateResult,
32
+ Database,
33
+ IPEntry,
34
+ LookupResult,
35
+ Provider,
36
+ Range,
37
+ VersionInfo,
38
+ )
39
+
40
+ __version__ = "0.1.0"
41
+
42
+ __all__ = [
43
+ "Detector",
44
+ "new_detector",
45
+ "lookup",
46
+ "get_provider",
47
+ "is_cloud_provider",
48
+ "is_aws",
49
+ "is_gcp",
50
+ "is_azure",
51
+ "is_cloudflare",
52
+ "is_digitalocean",
53
+ "is_oracle",
54
+ "get_ips",
55
+ "version",
56
+ "range_count",
57
+ "providers",
58
+ "update",
59
+ "check_update",
60
+ "clear_cache",
61
+ "remote_version",
62
+ "CheckUpdateResult",
63
+ "Database",
64
+ "IPEntry",
65
+ "LookupResult",
66
+ "Provider",
67
+ "Range",
68
+ "VersionInfo",
69
+ "PROVIDER_AWS",
70
+ "PROVIDER_GCP",
71
+ "PROVIDER_AZURE",
72
+ "PROVIDER_CLOUDFLARE",
73
+ "PROVIDER_DIGITALOCEAN",
74
+ "PROVIDER_ORACLE",
75
+ ]
76
+
77
+ _default: Optional[Detector] = None
78
+ _default_lock = threading.Lock()
79
+
80
+
81
+ def _get_default() -> Detector:
82
+ global _default
83
+ if _default is not None:
84
+ return _default
85
+ with _default_lock:
86
+ if _default is None:
87
+ _default = new_detector()
88
+ return _default
89
+
90
+
91
+ def lookup(ip: str) -> LookupResult:
92
+ return _get_default().lookup(ip)
93
+
94
+
95
+ def get_provider(ip: str) -> Provider:
96
+ return _get_default().get_provider(ip)
97
+
98
+
99
+ def is_cloud_provider(ip: str) -> bool:
100
+ return _get_default().is_cloud_provider(ip)
101
+
102
+
103
+ def is_aws(ip: str) -> bool:
104
+ return _get_default().is_aws(ip)
105
+
106
+
107
+ def is_gcp(ip: str) -> bool:
108
+ return _get_default().is_gcp(ip)
109
+
110
+
111
+ def is_azure(ip: str) -> bool:
112
+ return _get_default().is_azure(ip)
113
+
114
+
115
+ def is_cloudflare(ip: str) -> bool:
116
+ return _get_default().is_cloudflare(ip)
117
+
118
+
119
+ def is_digitalocean(ip: str) -> bool:
120
+ return _get_default().is_digitalocean(ip)
121
+
122
+
123
+ def is_oracle(ip: str) -> bool:
124
+ return _get_default().is_oracle(ip)
125
+
126
+
127
+ def get_ips(
128
+ providers: Optional[Union[Provider, List[Provider]]] = None
129
+ ) -> List[IPEntry]:
130
+ return _get_default().get_ips(providers)
131
+
132
+
133
+ def version() -> str:
134
+ return _get_default().version()
135
+
136
+
137
+ def range_count() -> int:
138
+ return _get_default().range_count()
139
+
140
+
141
+ def providers() -> List[Provider]:
142
+ return _get_default().providers()
143
+
144
+
145
+ def update() -> None:
146
+ _get_default().update()
147
+
148
+
149
+ def check_update() -> CheckUpdateResult:
150
+ return _get_default().check_update()
151
+
152
+
153
+ def remote_version(version_url: Optional[str] = None) -> VersionInfo:
154
+ if version_url is None:
155
+ from .constants import DEFAULT_VERSION_URL
156
+
157
+ version_url = DEFAULT_VERSION_URL
158
+ return load_version(version_url)
159
+
160
+
161
+ def clear_cache() -> None:
162
+ global _default
163
+ if _default is not None:
164
+ _default.clear_cache()
165
+ _default.close()
166
+ _default = None
cloudip/__main__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .cli import main
2
+
3
+ if __name__ == "__main__":
4
+ raise SystemExit(main())
cloudip/cache.py ADDED
@@ -0,0 +1,75 @@
1
+ """On-disk caching of the downloaded database under a user cache directory."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ import shutil
8
+ import time
9
+ from pathlib import Path
10
+ from typing import NamedTuple, Optional
11
+
12
+ _DATA_FILE = "cloudip.msgpack.gz"
13
+ _META_FILE = "version.json"
14
+
15
+
16
+ class CachedData(NamedTuple):
17
+ version: str
18
+ bytes: bytes
19
+
20
+
21
+ def _default_cache_dir() -> str:
22
+ base = (
23
+ os.environ.get("XDG_CACHE_HOME")
24
+ or os.path.join(os.path.expanduser("~"), ".cache")
25
+ )
26
+ return os.path.join(base, "py-cloudip")
27
+
28
+
29
+ def resolve_cache_dir(dir: Optional[str]) -> Optional[str]:
30
+ """``None`` disables caching; an unset (sentinel) value uses the default dir."""
31
+ if dir is None:
32
+ return None
33
+ if dir == "":
34
+ return _default_cache_dir()
35
+ return dir
36
+
37
+
38
+ def read_cache(dir: str) -> Optional[CachedData]:
39
+ try:
40
+ p = Path(dir)
41
+ data = (p / _DATA_FILE).read_bytes()
42
+ meta = json.loads((p / _META_FILE).read_text("utf-8"))
43
+ return CachedData(version=meta.get("version", ""), bytes=data)
44
+ except (OSError, ValueError):
45
+ return None
46
+
47
+
48
+ def write_cache(dir: str, version: str, data: bytes) -> None:
49
+ try:
50
+ p = Path(dir)
51
+ p.mkdir(parents=True, exist_ok=True)
52
+ (p / _DATA_FILE).write_bytes(data)
53
+ (p / _META_FILE).write_text(
54
+ json.dumps({"version": version, "stored_at": time.time()})
55
+ )
56
+ except OSError:
57
+ pass # best-effort
58
+
59
+
60
+ def cache_age_seconds(dir: str) -> Optional[float]:
61
+ try:
62
+ meta = json.loads((Path(dir) / _META_FILE).read_text("utf-8"))
63
+ return time.time() - float(meta["stored_at"])
64
+ except (OSError, ValueError, KeyError):
65
+ return None
66
+
67
+
68
+ def clear_cache(dir: Optional[str]) -> None:
69
+ resolved = resolve_cache_dir(dir if dir is not None else "")
70
+ if not resolved:
71
+ return
72
+ try:
73
+ shutil.rmtree(resolved, ignore_errors=True)
74
+ except OSError:
75
+ pass
cloudip/cli.py ADDED
@@ -0,0 +1,105 @@
1
+ """Command-line interface: ``cloudip <command> [args]``."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import sys
7
+ from typing import List
8
+
9
+ from . import (
10
+ check_update,
11
+ clear_cache,
12
+ get_ips,
13
+ get_provider,
14
+ lookup,
15
+ providers,
16
+ range_count,
17
+ update,
18
+ version,
19
+ )
20
+
21
+ HELP = """cloudip — cloud provider IP utilities (py-cloudip)
22
+
23
+ Usage:
24
+ cloudip lookup <ip> Reverse-lookup an IP address
25
+ cloudip get <provider>[,...] Print CIDRs for one or more providers
26
+ cloudip provider <ip> Print provider name for an IP
27
+ cloudip providers List supported providers
28
+ cloudip version Print local data version + range count
29
+ cloudip check-update Check if a newer upstream version exists
30
+ cloudip update Force a refresh from cloudip-db
31
+ cloudip clear-cache Delete the local cache
32
+ cloudip help Show this help
33
+
34
+ Data source: rezmoss/cloudip-db
35
+ """
36
+
37
+
38
+ def main(argv: List[str] = None) -> int:
39
+ argv = sys.argv[1:] if argv is None else argv
40
+ cmd = argv[0] if argv else None
41
+ args = argv[1:]
42
+
43
+ if cmd in (None, "help", "-h", "--help"):
44
+ sys.stdout.write(HELP)
45
+ return 0
46
+
47
+ try:
48
+ if cmd == "lookup":
49
+ if not args:
50
+ raise ValueError("usage: cloudip lookup <ip>")
51
+ r = lookup(args[0])
52
+ print(json.dumps(r.to_dict(), indent=2))
53
+ return 0 if r.found else 1
54
+
55
+ if cmd == "provider":
56
+ if not args:
57
+ raise ValueError("usage: cloudip provider <ip>")
58
+ p = get_provider(args[0])
59
+ if not p:
60
+ print("unknown")
61
+ return 1
62
+ print(p)
63
+ return 0
64
+
65
+ if cmd == "get":
66
+ if not args:
67
+ raise ValueError("usage: cloudip get <provider>[,<provider>]")
68
+ want = [s.strip() for s in args[0].split(",") if s.strip()]
69
+ for e in get_ips(want):
70
+ print(e.ip_address)
71
+ return 0
72
+
73
+ if cmd == "providers":
74
+ for p in providers():
75
+ print(p)
76
+ return 0
77
+
78
+ if cmd == "version":
79
+ print(f"{version()} ({range_count()} ranges)")
80
+ return 0
81
+
82
+ if cmd == "check-update":
83
+ result = check_update()
84
+ print(json.dumps(result.to_dict(), indent=2))
85
+ return 0 if result.has_update else 1
86
+
87
+ if cmd == "update":
88
+ update()
89
+ print("updated")
90
+ return 0
91
+
92
+ if cmd == "clear-cache":
93
+ clear_cache()
94
+ print("cache cleared")
95
+ return 0
96
+
97
+ sys.stderr.write(f"unknown command: {cmd}\n{HELP}")
98
+ return 2
99
+ except Exception as err: # noqa: BLE001
100
+ sys.stderr.write(f"error: {err}\n")
101
+ return 1
102
+
103
+
104
+ if __name__ == "__main__":
105
+ raise SystemExit(main())
cloudip/constants.py ADDED
@@ -0,0 +1,15 @@
1
+ """Shared constants: provider names, default URLs, and timing defaults."""
2
+
3
+ PROVIDER_AWS = "aws"
4
+ PROVIDER_GCP = "gcp"
5
+ PROVIDER_AZURE = "azure"
6
+ PROVIDER_CLOUDFLARE = "cloudflare"
7
+ PROVIDER_DIGITALOCEAN = "digitalocean"
8
+ PROVIDER_ORACLE = "oracle"
9
+
10
+ DEFAULT_BASE_URL = "https://raw.githubusercontent.com/rezmoss/cloudip-db/main/data"
11
+ DEFAULT_DATA_URL = f"{DEFAULT_BASE_URL}/cloudip.msgpack.gz"
12
+ DEFAULT_VERSION_URL = f"{DEFAULT_BASE_URL}/version.json"
13
+
14
+ HOUR_SECONDS = 60 * 60
15
+ DEFAULT_TTL_SECONDS = 24 * HOUR_SECONDS
Binary file
cloudip/decode.py ADDED
@@ -0,0 +1,44 @@
1
+ """Decompress + msgpack-decode the database, plus hashing helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import gzip
6
+ import hashlib
7
+
8
+ import msgpack
9
+
10
+ from .types import Database, Range
11
+
12
+
13
+ def gunzip(buf: bytes) -> bytes:
14
+ return gzip.decompress(buf)
15
+
16
+
17
+ def sha256_hex(buf: bytes) -> str:
18
+ return hashlib.sha256(buf).hexdigest()
19
+
20
+
21
+ def decode_database(gz_buf: bytes) -> Database:
22
+ """Decode a gzipped MessagePack database into a :class:`Database`."""
23
+ raw = gunzip(gz_buf)
24
+ obj = msgpack.unpackb(raw, raw=False)
25
+ providers = list(obj["providers"])
26
+ ranges = []
27
+ for r in obj["ranges"]:
28
+ idx = r["p"]
29
+ if idx < 0 or idx >= len(providers):
30
+ raise ValueError(f"cloudip: unknown provider index {idx}")
31
+ ranges.append(
32
+ Range(
33
+ cidr=r["cidr"],
34
+ provider=providers[idx],
35
+ region=r.get("r") or None,
36
+ service=r.get("s") or None,
37
+ )
38
+ )
39
+ return Database(
40
+ version=obj.get("version", ""),
41
+ build_time=int(obj.get("build_time", 0)),
42
+ providers=providers,
43
+ ranges=ranges,
44
+ )
cloudip/detector.py ADDED
@@ -0,0 +1,296 @@
1
+ """The Detector: loads data (network -> cache -> embedded) and answers lookups."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import threading
6
+ from typing import Dict, List, Optional, Union
7
+
8
+ from .cache import (
9
+ cache_age_seconds,
10
+ clear_cache,
11
+ read_cache,
12
+ resolve_cache_dir,
13
+ write_cache,
14
+ )
15
+ from .constants import (
16
+ DEFAULT_DATA_URL,
17
+ DEFAULT_TTL_SECONDS,
18
+ DEFAULT_VERSION_URL,
19
+ HOUR_SECONDS,
20
+ PROVIDER_AWS,
21
+ PROVIDER_AZURE,
22
+ PROVIDER_CLOUDFLARE,
23
+ PROVIDER_DIGITALOCEAN,
24
+ PROVIDER_GCP,
25
+ PROVIDER_ORACLE,
26
+ )
27
+ from .decode import decode_database
28
+ from .embedded_loader import load_embedded_gz
29
+ from .source import fetch_data, fetch_version
30
+ from .trie import CIDRTrie
31
+ from .types import (
32
+ CheckUpdateResult,
33
+ Database,
34
+ IPEntry,
35
+ LookupResult,
36
+ Provider,
37
+ VersionInfo,
38
+ )
39
+
40
+ # Sentinel distinguishing "use default cache dir" from "caching disabled" (None).
41
+ _DEFAULT_DIR = ""
42
+
43
+
44
+ class _State:
45
+ __slots__ = ("db", "trie", "by_provider")
46
+
47
+ def __init__(self, db: Database, trie: CIDRTrie, by_provider: Dict[str, List[IPEntry]]):
48
+ self.db = db
49
+ self.trie = trie
50
+ self.by_provider = by_provider
51
+
52
+
53
+ def _build_state(db: Database) -> _State:
54
+ trie = CIDRTrie()
55
+ by_provider: Dict[str, List[IPEntry]] = {}
56
+ for r in db.ranges:
57
+ trie.insert(r)
58
+ key = r.provider.lower()
59
+ entry = IPEntry(
60
+ ip_address=r.cidr,
61
+ ip_type="ipv6" if ":" in r.cidr else "ipv4",
62
+ provider=r.provider,
63
+ region=r.region,
64
+ service=r.service,
65
+ )
66
+ by_provider.setdefault(key, []).append(entry)
67
+ return _State(db, trie, by_provider)
68
+
69
+
70
+ class Detector:
71
+ """Detects which cloud provider (if any) owns a given IP address.
72
+
73
+ Create one with :func:`new_detector` (which loads data eagerly), or construct
74
+ directly and call :meth:`ready` before use.
75
+ """
76
+
77
+ def __init__(
78
+ self,
79
+ data_dir: Optional[str] = _DEFAULT_DIR,
80
+ auto_update_seconds: float = 0,
81
+ offline: bool = False,
82
+ data_url: str = DEFAULT_DATA_URL,
83
+ version_url: str = DEFAULT_VERSION_URL,
84
+ verify_sha256: bool = True,
85
+ ttl_seconds: float = DEFAULT_TTL_SECONDS,
86
+ timeout: float = 30.0,
87
+ ) -> None:
88
+ # An auto-update interval under an hour is clamped up to one hour.
89
+ if 0 < auto_update_seconds < HOUR_SECONDS:
90
+ auto_update_seconds = HOUR_SECONDS
91
+ self._data_dir = resolve_cache_dir(data_dir)
92
+ self._auto_update_seconds = auto_update_seconds
93
+ self._offline = offline
94
+ self._data_url = data_url
95
+ self._version_url = version_url
96
+ self._verify_sha256 = verify_sha256
97
+ self._ttl_seconds = ttl_seconds
98
+ self._timeout = timeout
99
+
100
+ self._state: Optional[_State] = None
101
+ self._lock = threading.RLock()
102
+ self._stop_event: Optional[threading.Event] = None
103
+ self._timer_thread: Optional[threading.Thread] = None
104
+
105
+ # -- lifecycle -------------------------------------------------------
106
+
107
+ def ready(self) -> "Detector":
108
+ with self._lock:
109
+ if self._state is not None:
110
+ return self
111
+ self._load_initial()
112
+ if self._auto_update_seconds > 0 and not self._offline:
113
+ self._start_auto_update()
114
+ return self
115
+
116
+ def close(self) -> None:
117
+ if self._stop_event is not None:
118
+ self._stop_event.set()
119
+ self._timer_thread = None
120
+
121
+ def __enter__(self) -> "Detector":
122
+ return self.ready()
123
+
124
+ def __exit__(self, *exc) -> None:
125
+ self.close()
126
+
127
+ def _start_auto_update(self) -> None:
128
+ self._stop_event = threading.Event()
129
+ interval = self._auto_update_seconds
130
+
131
+ def _run() -> None:
132
+ assert self._stop_event is not None
133
+ while not self._stop_event.wait(interval):
134
+ try:
135
+ self.update()
136
+ except Exception:
137
+ pass # background refresh is best-effort
138
+
139
+ self._timer_thread = threading.Thread(
140
+ target=_run, name="cloudip-auto-update", daemon=True
141
+ )
142
+ self._timer_thread.start()
143
+
144
+ # -- data loading ----------------------------------------------------
145
+
146
+ def _load_initial(self) -> None:
147
+ if not self._offline:
148
+ try:
149
+ fresh = fetch_data(
150
+ self._data_url,
151
+ self._version_url,
152
+ self._verify_sha256,
153
+ timeout=self._timeout,
154
+ )
155
+ self._state = _build_state(decode_database(fresh.bytes))
156
+ if self._data_dir:
157
+ write_cache(self._data_dir, fresh.version, fresh.bytes)
158
+ return
159
+ except Exception:
160
+ pass # fall through to cache / embedded
161
+
162
+ if self._data_dir:
163
+ cached = read_cache(self._data_dir)
164
+ if cached:
165
+ age = cache_age_seconds(self._data_dir)
166
+ fresh_enough = age is not None and (
167
+ self._offline or age <= self._ttl_seconds
168
+ )
169
+ # Use cache regardless of age when nothing else is available;
170
+ # being fresh just lets us skip without warning.
171
+ try:
172
+ self._state = _build_state(decode_database(cached.bytes))
173
+ if fresh_enough or True:
174
+ return
175
+ except Exception:
176
+ pass
177
+
178
+ embedded = load_embedded_gz()
179
+ if embedded is not None:
180
+ self._state = _build_state(decode_database(embedded))
181
+ return
182
+
183
+ raise RuntimeError(
184
+ "cloudip: no data available (network failed, no cache, no embedded data)"
185
+ )
186
+
187
+ def update(self) -> None:
188
+ if self._offline:
189
+ raise RuntimeError("cloudip: update disabled in offline mode")
190
+ fresh = fetch_data(
191
+ self._data_url,
192
+ self._version_url,
193
+ self._verify_sha256,
194
+ timeout=self._timeout,
195
+ )
196
+ state = _build_state(decode_database(fresh.bytes))
197
+ with self._lock:
198
+ self._state = state
199
+ if self._data_dir:
200
+ write_cache(self._data_dir, fresh.version, fresh.bytes)
201
+
202
+ def check_update(self) -> CheckUpdateResult:
203
+ if self._offline:
204
+ raise RuntimeError("cloudip: update check disabled in offline mode")
205
+ info = fetch_version(self._version_url, timeout=self._timeout)
206
+ local = self._state.db.version if self._state else ""
207
+ return CheckUpdateResult(has_update=info.version > local, info=info)
208
+
209
+ def clear_cache(self) -> None:
210
+ clear_cache(self._data_dir)
211
+
212
+ def _require_state(self) -> _State:
213
+ if self._state is None:
214
+ raise RuntimeError(
215
+ "cloudip: detector not ready — call detector.ready() first"
216
+ )
217
+ return self._state
218
+
219
+ # -- queries ---------------------------------------------------------
220
+
221
+ def lookup(self, ip: str) -> LookupResult:
222
+ r = self._require_state().trie.lookup(ip)
223
+ if r is None:
224
+ return LookupResult(found=False)
225
+ return LookupResult(
226
+ found=True,
227
+ provider=r.provider,
228
+ cidr=r.cidr,
229
+ ip_type="ipv6" if ":" in r.cidr else "ipv4",
230
+ region=r.region,
231
+ service=r.service,
232
+ )
233
+
234
+ def get_provider(self, ip: str) -> Provider:
235
+ return self.lookup(ip).provider or ""
236
+
237
+ def is_cloud_provider(self, ip: str) -> bool:
238
+ return self.lookup(ip).found
239
+
240
+ def is_aws(self, ip: str) -> bool:
241
+ return self.get_provider(ip) == PROVIDER_AWS
242
+
243
+ def is_gcp(self, ip: str) -> bool:
244
+ return self.get_provider(ip) == PROVIDER_GCP
245
+
246
+ def is_azure(self, ip: str) -> bool:
247
+ return self.get_provider(ip) == PROVIDER_AZURE
248
+
249
+ def is_cloudflare(self, ip: str) -> bool:
250
+ return self.get_provider(ip) == PROVIDER_CLOUDFLARE
251
+
252
+ def is_digitalocean(self, ip: str) -> bool:
253
+ return self.get_provider(ip) == PROVIDER_DIGITALOCEAN
254
+
255
+ def is_oracle(self, ip: str) -> bool:
256
+ return self.get_provider(ip) == PROVIDER_ORACLE
257
+
258
+ def get_ips(
259
+ self, providers: Optional[Union[Provider, List[Provider]]] = None
260
+ ) -> List[IPEntry]:
261
+ s = self._require_state()
262
+ if providers is None:
263
+ out: List[IPEntry] = []
264
+ for arr in s.by_provider.values():
265
+ out.extend(arr)
266
+ return out
267
+ want = [providers] if isinstance(providers, str) else list(providers)
268
+ out = []
269
+ for p in want:
270
+ hit = s.by_provider.get(p.lower())
271
+ if hit:
272
+ out.extend(hit)
273
+ return out
274
+
275
+ def version(self) -> str:
276
+ return self._state.db.version if self._state else ""
277
+
278
+ def build_time(self) -> int:
279
+ return self._state.db.build_time if self._state else 0
280
+
281
+ def range_count(self) -> int:
282
+ return len(self._state.db.ranges) if self._state else 0
283
+
284
+ def providers(self) -> List[Provider]:
285
+ return list(self._state.db.providers) if self._state else []
286
+
287
+
288
+ def new_detector(**options) -> Detector:
289
+ """Construct a :class:`Detector` and load its data eagerly."""
290
+ return Detector(**options).ready()
291
+
292
+
293
+ def load_version(
294
+ version_url: str = DEFAULT_VERSION_URL, timeout: float = 30.0
295
+ ) -> VersionInfo:
296
+ return fetch_version(version_url, timeout=timeout)
cloudip/embedded.py ADDED
@@ -0,0 +1,90 @@
1
+ """Offline-only entry point: uses the bundled database and never touches the network.
2
+
3
+ Mirrors :mod:`cloudip` but the default detector is created with ``offline=True``
4
+ and no on-disk cache, so it works in air-gapped environments::
5
+
6
+ from cloudip import embedded
7
+ embedded.is_aws("52.94.76.1") # uses bundled data only
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import threading
13
+ import time
14
+ from typing import List, Optional, Union
15
+
16
+ from .detector import Detector, new_detector
17
+ from .types import IPEntry, LookupResult, Provider
18
+
19
+ _default: Optional[Detector] = None
20
+ _default_lock = threading.Lock()
21
+
22
+
23
+ def _get_default() -> Detector:
24
+ global _default
25
+ if _default is not None:
26
+ return _default
27
+ with _default_lock:
28
+ if _default is None:
29
+ _default = new_detector(offline=True, data_dir=None)
30
+ return _default
31
+
32
+
33
+ def lookup(ip: str) -> LookupResult:
34
+ return _get_default().lookup(ip)
35
+
36
+
37
+ def get_provider(ip: str) -> Provider:
38
+ return _get_default().get_provider(ip)
39
+
40
+
41
+ def is_cloud_provider(ip: str) -> bool:
42
+ return _get_default().is_cloud_provider(ip)
43
+
44
+
45
+ def is_aws(ip: str) -> bool:
46
+ return _get_default().is_aws(ip)
47
+
48
+
49
+ def is_gcp(ip: str) -> bool:
50
+ return _get_default().is_gcp(ip)
51
+
52
+
53
+ def is_azure(ip: str) -> bool:
54
+ return _get_default().is_azure(ip)
55
+
56
+
57
+ def is_cloudflare(ip: str) -> bool:
58
+ return _get_default().is_cloudflare(ip)
59
+
60
+
61
+ def is_digitalocean(ip: str) -> bool:
62
+ return _get_default().is_digitalocean(ip)
63
+
64
+
65
+ def is_oracle(ip: str) -> bool:
66
+ return _get_default().is_oracle(ip)
67
+
68
+
69
+ def get_ips(
70
+ providers: Optional[Union[Provider, List[Provider]]] = None
71
+ ) -> List[IPEntry]:
72
+ return _get_default().get_ips(providers)
73
+
74
+
75
+ def version() -> str:
76
+ return _get_default().version()
77
+
78
+
79
+ def range_count() -> int:
80
+ return _get_default().range_count()
81
+
82
+
83
+ def providers() -> List[Provider]:
84
+ return _get_default().providers()
85
+
86
+
87
+ def age_days() -> float:
88
+ """Age of the bundled data in days, based on its build time."""
89
+ d = _get_default()
90
+ return (time.time() - d.build_time()) / 86400
@@ -0,0 +1,28 @@
1
+ """Locate and read the embedded fallback database shipped inside the package."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Optional
7
+
8
+ _DATA_FILE = "cloudip.msgpack.gz"
9
+
10
+
11
+ def load_embedded_gz() -> Optional[bytes]:
12
+ candidates = [
13
+ Path(__file__).resolve().parent / "data" / _DATA_FILE,
14
+ Path(__file__).resolve().parent / _DATA_FILE,
15
+ ]
16
+ for p in candidates:
17
+ try:
18
+ return p.read_bytes()
19
+ except OSError:
20
+ continue
21
+ # Fall back to importlib.resources for zip/installed layouts.
22
+ try:
23
+ from importlib.resources import files
24
+
25
+ res = files("cloudip").joinpath("data", _DATA_FILE)
26
+ return res.read_bytes()
27
+ except (OSError, ModuleNotFoundError, FileNotFoundError, AttributeError):
28
+ return None
cloudip/source.py ADDED
@@ -0,0 +1,56 @@
1
+ """Network fetching of version metadata and the database, with SHA-256 verification."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import urllib.request
7
+ from typing import NamedTuple, Optional
8
+
9
+ from .constants import DEFAULT_DATA_URL, DEFAULT_VERSION_URL
10
+ from .decode import gunzip, sha256_hex
11
+ from .types import VersionInfo
12
+
13
+ _USER_AGENT = "py-cloudip"
14
+
15
+
16
+ def _get(url: str, timeout: float) -> bytes:
17
+ req = urllib.request.Request(url, headers={"User-Agent": _USER_AGENT})
18
+ with urllib.request.urlopen(req, timeout=timeout) as resp:
19
+ status = getattr(resp, "status", 200)
20
+ if status and status >= 400:
21
+ raise OSError(f"cloudip: HTTP {status} from {url}")
22
+ return resp.read()
23
+
24
+
25
+ def fetch_version(
26
+ version_url: str = DEFAULT_VERSION_URL, timeout: float = 30.0
27
+ ) -> VersionInfo:
28
+ data = _get(version_url, timeout)
29
+ return VersionInfo.from_dict(json.loads(data.decode("utf-8")))
30
+
31
+
32
+ class FetchedData(NamedTuple):
33
+ bytes: bytes
34
+ version: str
35
+
36
+
37
+ def fetch_data(
38
+ data_url: str = DEFAULT_DATA_URL,
39
+ version_url: str = DEFAULT_VERSION_URL,
40
+ verify_sha256: bool = True,
41
+ verify_against: Optional[VersionInfo] = None,
42
+ timeout: float = 30.0,
43
+ ) -> FetchedData:
44
+ """Download the gzipped database and verify its SHA-256 against version.json."""
45
+ gz = _get(data_url, timeout)
46
+ info = verify_against
47
+ if info is None and (verify_sha256 or True):
48
+ info = fetch_version(version_url, timeout)
49
+ if verify_sha256:
50
+ raw = gunzip(gz)
51
+ digest = sha256_hex(raw)
52
+ if digest != info.sha256:
53
+ raise ValueError(
54
+ f"cloudip: sha256 mismatch (expected {info.sha256}, got {digest})"
55
+ )
56
+ return FetchedData(bytes=gz, version=info.version if info else "")
cloudip/trie.py ADDED
@@ -0,0 +1,79 @@
1
+ """A binary (Patricia-style) trie over CIDR prefixes for longest-prefix matching.
2
+
3
+ Separate tries are kept for IPv4 (32-bit) and IPv6 (128-bit) addresses. ``lookup``
4
+ descends the trie bit by bit, remembering the most specific (longest-prefix)
5
+ range seen along the path — matching the behaviour of go-cloudip / js-cloudip.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import ipaddress
11
+ from typing import Optional
12
+
13
+ from .types import Range
14
+
15
+
16
+ class _Node:
17
+ __slots__ = ("zero", "one", "range")
18
+
19
+ def __init__(self) -> None:
20
+ self.zero: Optional[_Node] = None
21
+ self.one: Optional[_Node] = None
22
+ self.range: Optional[Range] = None
23
+
24
+
25
+ def _parse_network(cidr: str):
26
+ """Return an ip_network for ``cidr`` (host bits allowed), or None if invalid."""
27
+ try:
28
+ return ipaddress.ip_network(cidr, strict=False)
29
+ except ValueError:
30
+ return None
31
+
32
+
33
+ class CIDRTrie:
34
+ def __init__(self) -> None:
35
+ self._v4 = _Node()
36
+ self._v6 = _Node()
37
+
38
+ def insert(self, rng: Range) -> None:
39
+ net = _parse_network(rng.cidr)
40
+ if net is None:
41
+ return
42
+ nbits = 32 if net.version == 4 else 128
43
+ value = int(net.network_address)
44
+ root = self._v4 if net.version == 4 else self._v6
45
+ node = root
46
+ for i in range(net.prefixlen):
47
+ bit = (value >> (nbits - 1 - i)) & 1
48
+ if bit:
49
+ child = node.one
50
+ if child is None:
51
+ child = _Node()
52
+ node.one = child
53
+ else:
54
+ child = node.zero
55
+ if child is None:
56
+ child = _Node()
57
+ node.zero = child
58
+ node = child
59
+ # Keep the first range inserted at a given prefix (stable, like the JS port).
60
+ if node.range is None:
61
+ node.range = rng
62
+
63
+ def lookup(self, ip: str) -> Optional[Range]:
64
+ try:
65
+ addr = ipaddress.ip_address(ip)
66
+ except ValueError:
67
+ return None
68
+ nbits = 32 if addr.version == 4 else 128
69
+ value = int(addr)
70
+ node: Optional[_Node] = self._v4 if addr.version == 4 else self._v6
71
+ best = node.range if node else None
72
+ for i in range(nbits):
73
+ if node is None:
74
+ break
75
+ bit = (value >> (nbits - 1 - i)) & 1
76
+ node = node.one if bit else node.zero
77
+ if node is not None and node.range is not None:
78
+ best = node.range
79
+ return best
cloudip/types.py ADDED
@@ -0,0 +1,126 @@
1
+ """Typed data structures used across the library."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Optional
7
+
8
+ Provider = str
9
+
10
+
11
+ @dataclass(frozen=True)
12
+ class Range:
13
+ """A single CIDR block belonging to a provider."""
14
+
15
+ cidr: str
16
+ provider: Provider
17
+ region: Optional[str] = None
18
+ service: Optional[str] = None
19
+
20
+
21
+ @dataclass
22
+ class IPEntry:
23
+ """A forward-lookup entry returned by ``get_ips``."""
24
+
25
+ ip_address: str
26
+ ip_type: str # "ipv4" | "ipv6"
27
+ provider: Provider
28
+ region: Optional[str] = None
29
+ service: Optional[str] = None
30
+
31
+ def to_dict(self) -> dict:
32
+ out = {
33
+ "ip_address": self.ip_address,
34
+ "ip_type": self.ip_type,
35
+ "provider": self.provider,
36
+ }
37
+ if self.region:
38
+ out["region"] = self.region
39
+ if self.service:
40
+ out["service"] = self.service
41
+ return out
42
+
43
+
44
+ @dataclass
45
+ class LookupResult:
46
+ """The result of a reverse lookup of an IP address."""
47
+
48
+ found: bool
49
+ provider: Optional[Provider] = None
50
+ region: Optional[str] = None
51
+ service: Optional[str] = None
52
+ cidr: Optional[str] = None
53
+ ip_type: Optional[str] = None
54
+
55
+ def to_dict(self) -> dict:
56
+ """Serialise omitting absent fields, mirroring js-cloudip's JSON output."""
57
+ if not self.found:
58
+ return {"found": False}
59
+ out = {
60
+ "found": True,
61
+ "provider": self.provider,
62
+ "cidr": self.cidr,
63
+ "ip_type": self.ip_type,
64
+ }
65
+ if self.region:
66
+ out["region"] = self.region
67
+ if self.service:
68
+ out["service"] = self.service
69
+ return out
70
+
71
+
72
+ @dataclass
73
+ class VersionInfo:
74
+ """Contents of cloudip-db's ``version.json``."""
75
+
76
+ version: str
77
+ build_time: int
78
+ sha256: str
79
+ ranges: int
80
+ size: int
81
+ size_gzip: int
82
+
83
+ @classmethod
84
+ def from_dict(cls, d: dict) -> "VersionInfo":
85
+ return cls(
86
+ version=d.get("version", ""),
87
+ build_time=int(d.get("build_time", 0)),
88
+ sha256=d.get("sha256", ""),
89
+ ranges=int(d.get("ranges", 0)),
90
+ size=int(d.get("size", 0)),
91
+ size_gzip=int(d.get("size_gzip", 0)),
92
+ )
93
+
94
+ def to_dict(self) -> dict:
95
+ return {
96
+ "version": self.version,
97
+ "build_time": self.build_time,
98
+ "sha256": self.sha256,
99
+ "ranges": self.ranges,
100
+ "size": self.size,
101
+ "size_gzip": self.size_gzip,
102
+ }
103
+
104
+
105
+ @dataclass
106
+ class Database:
107
+ """The decoded database."""
108
+
109
+ version: str
110
+ build_time: int
111
+ providers: list = field(default_factory=list)
112
+ ranges: list = field(default_factory=list)
113
+
114
+
115
+ @dataclass
116
+ class CheckUpdateResult:
117
+ """Result of ``check_update``."""
118
+
119
+ has_update: bool
120
+ info: Optional[VersionInfo] = None
121
+
122
+ def to_dict(self) -> dict:
123
+ out = {"has_update": self.has_update}
124
+ if self.info is not None:
125
+ out["info"] = self.info.to_dict()
126
+ return out
@@ -0,0 +1,129 @@
1
+ Metadata-Version: 2.4
2
+ Name: py-cloudip
3
+ Version: 0.1.0
4
+ Summary: Fast cloud-provider IP detection (AWS, GCP, Azure, Cloudflare, DigitalOcean, Oracle)
5
+ Project-URL: Homepage, https://github.com/rezmoss/py-cloudip
6
+ Project-URL: Database, https://github.com/rezmoss/cloudip-db
7
+ Author: Rez Moss
8
+ License: MIT
9
+ License-File: LICENSE
10
+ Keywords: aws,azure,cidr,cloud,cloudflare,gcp,geoip,ip
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Topic :: Internet
15
+ Classifier: Topic :: System :: Networking
16
+ Requires-Python: >=3.8
17
+ Requires-Dist: msgpack>=1.0
18
+ Description-Content-Type: text/markdown
19
+
20
+ # py-cloudip
21
+
22
+ Fast cloud-provider IP detection for Python. Identify whether an IP address
23
+ belongs to **AWS, GCP, Azure, Cloudflare, DigitalOcean, or Oracle Cloud** via
24
+ longest-prefix-match lookups over a Patricia/binary trie.
25
+
26
+ A Python port of [js-cloudip](https://github.com/rezmoss/js-cloudip) and
27
+ [go-cloudip](https://github.com/rezmoss/go-cloudip), backed by the daily-updated
28
+ [cloudip-db](https://github.com/rezmoss/cloudip-db) MessagePack database.
29
+
30
+ - **Fast** — binary trie, IPv4 + IPv6, longest-prefix match.
31
+ - **Auto-updating** — fetches fresh data from `cloudip-db` with SHA-256 verification.
32
+ - **Offline-capable** — file cache plus an embedded database bundled in the wheel.
33
+ - **Zero config** — works on first call; one tiny dependency (`msgpack`).
34
+
35
+ ## Install
36
+
37
+ ```bash
38
+ pip install py-cloudip
39
+ ```
40
+
41
+ ## Usage
42
+
43
+ ```python
44
+ import cloudip
45
+
46
+ cloudip.is_aws("52.94.76.1") # True
47
+ cloudip.get_provider("34.64.0.1") # "gcp"
48
+ cloudip.is_cloud_provider("1.1.1.1") # True
49
+
50
+ r = cloudip.lookup("52.94.76.1")
51
+ # LookupResult(found=True, provider="aws", region="us-east-1", service="EC2",
52
+ # cidr="52.94.76.0/22", ip_type="ipv4")
53
+ r.to_dict()
54
+ # {"found": True, "provider": "aws", "cidr": "52.94.76.0/22",
55
+ # "ip_type": "ipv4", "region": "us-east-1", "service": "EC2"}
56
+
57
+ # Forward lookup: every CIDR for one or more providers
58
+ cloudip.get_ips("cloudflare") # list[IPEntry]
59
+ cloudip.get_ips(["aws", "gcp"])
60
+ ```
61
+
62
+ ### Provider checks
63
+
64
+ `is_aws`, `is_gcp`, `is_azure`, `is_cloudflare`, `is_digitalocean`, `is_oracle`,
65
+ `is_cloud_provider`.
66
+
67
+ ### Metadata & updates
68
+
69
+ ```python
70
+ cloudip.version() # "2026-06-05"
71
+ cloudip.range_count() # 124455
72
+ cloudip.providers() # ["aws", "gcp", "cloudflare", "azure", "digitalocean", "oracle"]
73
+ cloudip.check_update() # CheckUpdateResult(has_update=..., info=VersionInfo(...))
74
+ cloudip.update() # force refresh
75
+ cloudip.clear_cache()
76
+ ```
77
+
78
+ ## Custom detector
79
+
80
+ ```python
81
+ detector = cloudip.new_detector(
82
+ data_dir="./cache", # None disables file caching; "" = default ~/.cache/py-cloudip
83
+ auto_update_seconds=86400, # background refresh (min 1h); 0 disables
84
+ offline=False, # air-gapped mode
85
+ verify_sha256=True,
86
+ ttl_seconds=86400,
87
+ )
88
+ detector.lookup("52.94.76.1")
89
+ detector.close() # stop the background updater
90
+ # or: with cloudip.Detector(...) as d: ...
91
+ ```
92
+
93
+ ## Offline / air-gapped
94
+
95
+ The `cloudip.embedded` module never touches the network — it uses only the
96
+ database bundled in the package:
97
+
98
+ ```python
99
+ from cloudip import embedded
100
+ embedded.is_aws("52.94.76.1") # uses bundled data only
101
+ embedded.age_days() # how old the bundled data is
102
+ ```
103
+
104
+ ## CLI
105
+
106
+ ```bash
107
+ cloudip lookup 52.94.76.1
108
+ cloudip provider 34.64.0.1
109
+ cloudip get cloudflare
110
+ cloudip get aws,gcp
111
+ cloudip providers
112
+ cloudip version
113
+ cloudip check-update
114
+ cloudip update
115
+ cloudip clear-cache
116
+ ```
117
+
118
+ (Also runnable as `python -m cloudip`.)
119
+
120
+ ## How it works
121
+
122
+ 1. Fetch `version.json` + `cloudip.msgpack.gz` from `cloudip-db`.
123
+ 2. Verify the SHA-256 of the decompressed MessagePack against `version.json`.
124
+ 3. Decode and build per-protocol tries for sub-millisecond lookups.
125
+ 4. On network failure, fall back to the on-disk cache, then the embedded database.
126
+
127
+ ## License
128
+
129
+ MIT
@@ -0,0 +1,18 @@
1
+ cloudip/__init__.py,sha256=88cR58DfEMdmY8YdIf5xPplfVJFhJbIWiY7TDnt3Voo,3459
2
+ cloudip/__main__.py,sha256=MHKZ_ae3fSLGTLUUMOx15fWdeOnJSHhq-zslRP5F5Lc,79
3
+ cloudip/cache.py,sha256=gNA3Jb6gXECxZSxkStme8Sp4-5gyTHGw72ns0XL4hxY,1971
4
+ cloudip/cli.py,sha256=mnB2uZDwcgJ-zoHV6GnKhnf_SQrzpw4AoV6NfDKqDt4,2940
5
+ cloudip/constants.py,sha256=g2i85KKfpoj7mOz7Vcu0YTKy5foyxvcuyS7s65YBzLc,510
6
+ cloudip/decode.py,sha256=pJFhyuKq8M3oHAHvQ55uzGgNxkSjV1HCsB4f-w9hiQA,1139
7
+ cloudip/detector.py,sha256=69e9v96bu2l9dSJ1NtyuhNupDCQcKDLT2j4afIa32Ac,9498
8
+ cloudip/embedded.py,sha256=ygKNkuH1XVXetmUgOLYDFs9f_ZZa5_IVfjlwO--VlZE,2064
9
+ cloudip/embedded_loader.py,sha256=s5vs2vpSoeybAGeHGEsvzxeALWBQeN0CulDp_EvPi-E,822
10
+ cloudip/source.py,sha256=E-bRpqIn-cOyOqxMHAO86-6pge9G3UCTiPgk0O22rtE,1778
11
+ cloudip/trie.py,sha256=xO8OWzErHj05cQTdQh8SgWhYIvAriEVFFQ7xvtTUzv8,2501
12
+ cloudip/types.py,sha256=rwwB4LiB5pLT38BvweP8K-yEjcFWu6w3binOQLP9W6o,3049
13
+ cloudip/data/cloudip.msgpack.gz,sha256=6xY4I2ga8IHy-rBw91pKzVbVt6OStIkMfOJyU4FP8uc,759422
14
+ py_cloudip-0.1.0.dist-info/METADATA,sha256=Z39eZy_6-MVpA1bQY1hB6EQw-BMh8eD9jAIQ_xs9gMw,3954
15
+ py_cloudip-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
16
+ py_cloudip-0.1.0.dist-info/entry_points.txt,sha256=7NmE7dtFGOsNittGAfyj2Ugl1IRQWc5r_D51LnSLJ9k,45
17
+ py_cloudip-0.1.0.dist-info/licenses/LICENSE,sha256=G3bni7VqsewOml7-B0pMnTufsambmnDHyYLmEmFhmSA,1065
18
+ py_cloudip-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ cloudip = cloudip.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Rez Moss
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.