os-normalizer 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of os-normalizer might be problematic. Click here for more details.
- os_normalizer/__init__.py +10 -0
- os_normalizer/constants.py +87 -0
- os_normalizer/cpe.py +265 -0
- os_normalizer/helpers.py +107 -0
- os_normalizer/models.py +159 -0
- os_normalizer/os_normalizer.py +313 -0
- os_normalizer/parsers/__init__.py +16 -0
- os_normalizer/parsers/bsd.py +69 -0
- os_normalizer/parsers/linux.py +121 -0
- os_normalizer/parsers/macos.py +111 -0
- os_normalizer/parsers/mobile.py +37 -0
- os_normalizer/parsers/network/__init__.py +61 -0
- os_normalizer/parsers/network/cisco.py +96 -0
- os_normalizer/parsers/network/fortinet.py +56 -0
- os_normalizer/parsers/network/huawei.py +38 -0
- os_normalizer/parsers/network/juniper.py +42 -0
- os_normalizer/parsers/network/netgear.py +41 -0
- os_normalizer/parsers/windows.py +197 -0
- os_normalizer-0.3.2.dist-info/METADATA +172 -0
- os_normalizer-0.3.2.dist-info/RECORD +22 -0
- os_normalizer-0.3.2.dist-info/WHEEL +4 -0
- os_normalizer-0.3.2.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Constants and static lookup tables for the OS fingerprinting package."""
|
|
2
|
+
|
|
3
|
+
# Architecture synonyms
|
|
4
|
+
ARCH_SYNONYMS = {
|
|
5
|
+
"x64": "x86_64",
|
|
6
|
+
"x86_64": "x86_64",
|
|
7
|
+
"amd64": "x86_64",
|
|
8
|
+
"x86": "x86",
|
|
9
|
+
"i386": "x86",
|
|
10
|
+
"i686": "x86",
|
|
11
|
+
"aarch64": "arm64",
|
|
12
|
+
"arm64": "arm64",
|
|
13
|
+
"armv8": "arm64",
|
|
14
|
+
"armv7": "arm",
|
|
15
|
+
"armv7l": "arm",
|
|
16
|
+
"ppc64le": "ppc64le",
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
# Windows build map (build number range -> product name, marketing channel)
|
|
20
|
+
WINDOWS_BUILD_MAP = [
|
|
21
|
+
# Windows 10
|
|
22
|
+
(10240, 10240, "Windows 10", "1507"),
|
|
23
|
+
(10586, 10586, "Windows 10", "1511"),
|
|
24
|
+
(14393, 14393, "Windows 10", "1607"),
|
|
25
|
+
(15063, 15063, "Windows 10", "1703"),
|
|
26
|
+
(16299, 16299, "Windows 10", "1709"),
|
|
27
|
+
(17134, 17134, "Windows 10", "1803"),
|
|
28
|
+
(17763, 17763, "Windows 10", "1809"),
|
|
29
|
+
(18362, 18363, "Windows 10", "1903/1909"),
|
|
30
|
+
(19041, 19045, "Windows 10", "2004/20H2/21H1/21H2/22H2"),
|
|
31
|
+
# Windows 11
|
|
32
|
+
(22000, 22000, "Windows 11", "21H2"),
|
|
33
|
+
(22621, 22630, "Windows 11", "22H2"),
|
|
34
|
+
(22631, 25999, "Windows 11", "23H2"),
|
|
35
|
+
(26100, 26199, "Windows 11", "24H2"),
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
# Windows NT version tuple -> client product (ambiguous NT 6.x split out)
|
|
39
|
+
WINDOWS_NT_CLIENT_MAP = {
|
|
40
|
+
(4, 0): "Windows NT 4.0",
|
|
41
|
+
(5, 0): "Windows 2000",
|
|
42
|
+
(5, 1): "Windows XP",
|
|
43
|
+
(5, 2): "Windows XP x64/Server 2003", # NT 5.2 often maps to XP x64 on client
|
|
44
|
+
(6, 0): "Windows Vista",
|
|
45
|
+
(6, 1): "Windows 7",
|
|
46
|
+
(6, 2): "Windows 8",
|
|
47
|
+
(6, 3): "Windows 8.1",
|
|
48
|
+
(10, 0): "Windows 10/11",
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
# Windows NT version tuple -> server product
|
|
52
|
+
WINDOWS_NT_SERVER_MAP = {
|
|
53
|
+
(4, 0): "Windows NT 4.0 Server",
|
|
54
|
+
(5, 0): "Windows 2000 Server",
|
|
55
|
+
(5, 1): "Windows XP/Server 2003", # rarely used for server detection
|
|
56
|
+
(5, 2): "Windows Server 2003",
|
|
57
|
+
(6, 0): "Windows Server 2008",
|
|
58
|
+
(6, 1): "Windows Server 2008 R2",
|
|
59
|
+
(6, 2): "Windows Server 2012",
|
|
60
|
+
(6, 3): "Windows Server 2012 R2",
|
|
61
|
+
# NT 10.0: Server 2016/2019/2022 detected via explicit names, not NT mapping
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
# Human readable aliases (macOS codenames)
|
|
65
|
+
MACOS_ALIASES = {
|
|
66
|
+
"sonoma": "macOS 14",
|
|
67
|
+
"sequoia": "macOS 15",
|
|
68
|
+
"ventura": "macOS 13",
|
|
69
|
+
"monterey": "macOS 12",
|
|
70
|
+
"big sur": "macOS 11",
|
|
71
|
+
"bigsur": "macOS 11",
|
|
72
|
+
"catalina": "macOS 10.15",
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
# macOS Darwin major version -> (product name, product version, codename)
|
|
76
|
+
MACOS_DARWIN_MAP = {
|
|
77
|
+
19: ("macOS", "10.15", "Catalina"),
|
|
78
|
+
20: ("macOS", "11", "Big Sur"),
|
|
79
|
+
21: ("macOS", "12", "Monterey"),
|
|
80
|
+
22: ("macOS", "13", "Ventura"),
|
|
81
|
+
23: ("macOS", "14", "Sonoma"),
|
|
82
|
+
24: ("macOS", "15", "Sequoia"),
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
# Cisco train names (used for codename detection)
|
|
86
|
+
CISCO_TRAIN_NAMES = {"Everest", "Fuji", "Gibraltar", "Amsterdam", "Denali"}
|
|
87
|
+
|
os_normalizer/cpe.py
ADDED
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
"""CPE 2.3 generation utilities for OSData.
|
|
2
|
+
|
|
3
|
+
This aims for pragmatic correctness for common operating systems based on
|
|
4
|
+
the fields present in OSData. Exact dictionary matching for all vendors
|
|
5
|
+
is out of scope; instead we provide a curated mapping for popular OSes.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from .models import OSData
|
|
14
|
+
|
|
15
|
+
_ESCAPE_CHARS = set("\\: ?*(){}[]!\"#$%&'+,/:;<=>@^`|~")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _escape(s: str | None) -> str:
|
|
19
|
+
if s is None:
|
|
20
|
+
return "*"
|
|
21
|
+
if s == "":
|
|
22
|
+
return "-"
|
|
23
|
+
if s in ("*", "-"):
|
|
24
|
+
# Wildcard or N/A tokens are used verbatim in CPE
|
|
25
|
+
return s
|
|
26
|
+
out = []
|
|
27
|
+
for ch in s:
|
|
28
|
+
if ch in _ESCAPE_CHARS:
|
|
29
|
+
out.append("\\" + ch)
|
|
30
|
+
else:
|
|
31
|
+
out.append(ch)
|
|
32
|
+
return "".join(out)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _map_vendor_product(p: OSData) -> tuple[str, str, str]:
|
|
36
|
+
"""Return (vendor_token, product_token, strategy).
|
|
37
|
+
|
|
38
|
+
strategy controls version/update selection rules.
|
|
39
|
+
"""
|
|
40
|
+
fam = (p.family or "").lower()
|
|
41
|
+
vendor = (p.vendor or "").lower() if p.vendor else None
|
|
42
|
+
product = (p.product or "").lower() if p.product else None
|
|
43
|
+
|
|
44
|
+
# Windows
|
|
45
|
+
if fam == "windows":
|
|
46
|
+
vtok = "microsoft"
|
|
47
|
+
prod_map = {
|
|
48
|
+
"windows 7": "windows_7",
|
|
49
|
+
"windows 8.1": "windows_8.1",
|
|
50
|
+
"windows 8": "windows_8",
|
|
51
|
+
"windows 10": "windows_10",
|
|
52
|
+
"windows 11": "windows_11",
|
|
53
|
+
"windows server 2008": "windows_server_2008",
|
|
54
|
+
"windows server 2008 r2": "windows_server_2008_r2",
|
|
55
|
+
"windows server 2012": "windows_server_2012",
|
|
56
|
+
"windows server 2012 r2": "windows_server_2012_r2",
|
|
57
|
+
"windows server 2016": "windows_server_2016",
|
|
58
|
+
"windows server 2019": "windows_server_2019",
|
|
59
|
+
"windows server 2022": "windows_server_2022",
|
|
60
|
+
}
|
|
61
|
+
base = prod_map.get(product, "windows")
|
|
62
|
+
# For Windows 10/11, append channel (e.g., _23h2) when available
|
|
63
|
+
if base in ("windows_10", "windows_11") and p.channel:
|
|
64
|
+
base = f"{base}_{p.channel.lower()}"
|
|
65
|
+
return vtok, base, "windows"
|
|
66
|
+
|
|
67
|
+
# macOS
|
|
68
|
+
if fam == "macos":
|
|
69
|
+
return "apple", "macos", "macos"
|
|
70
|
+
|
|
71
|
+
# Linux distros (use distro when present)
|
|
72
|
+
if fam == "linux":
|
|
73
|
+
d = (p.distro or "").lower()
|
|
74
|
+
if d == "ubuntu":
|
|
75
|
+
return "canonical", "ubuntu_linux", "ubuntu"
|
|
76
|
+
if d == "debian":
|
|
77
|
+
return "debian", "debian_linux", "debian"
|
|
78
|
+
if d in ("rhel", "redhat", "red_hat"):
|
|
79
|
+
return "redhat", "enterprise_linux", "rhel"
|
|
80
|
+
if d in ("sles", "suse", "opensuse"):
|
|
81
|
+
# Simplify to SLES when ID is sles; otherwise opensuse
|
|
82
|
+
if d == "sles":
|
|
83
|
+
return "suse", "linux_enterprise_server", "sles"
|
|
84
|
+
return "suse", "opensuse", "opensuse"
|
|
85
|
+
if d == "fedora":
|
|
86
|
+
return "fedoraproject", "fedora", "fedora"
|
|
87
|
+
if d in ("amzn", "amazon"):
|
|
88
|
+
return "amazon", "amazon_linux", "amazon"
|
|
89
|
+
# Generic Linux fallback
|
|
90
|
+
return vendor or "linux", product or "linux", "linux"
|
|
91
|
+
|
|
92
|
+
# BSDs
|
|
93
|
+
if fam == "bsd":
|
|
94
|
+
if product and "freebsd" in product:
|
|
95
|
+
return "freebsd", "freebsd", "freebsd"
|
|
96
|
+
if product and "openbsd" in product:
|
|
97
|
+
return "openbsd", "openbsd", "openbsd"
|
|
98
|
+
if product and "netbsd" in product:
|
|
99
|
+
return "netbsd", "netbsd", "netbsd"
|
|
100
|
+
return vendor or "bsd", product or "bsd", "bsd"
|
|
101
|
+
|
|
102
|
+
# Network OS
|
|
103
|
+
if fam == "network-os":
|
|
104
|
+
if vendor == "cisco":
|
|
105
|
+
if product and ("ios xe" in product or "ios-xe" in product):
|
|
106
|
+
return "cisco", "ios_xe", "ios_xe"
|
|
107
|
+
if product and ("nx-os" in product or "nxos" in product):
|
|
108
|
+
return "cisco", "nx-os", "nx_os"
|
|
109
|
+
if vendor == "juniper":
|
|
110
|
+
return "juniper", "junos", "junos"
|
|
111
|
+
if vendor == "fortinet":
|
|
112
|
+
return "fortinet", "fortios", "fortios"
|
|
113
|
+
if vendor == "huawei":
|
|
114
|
+
return "huawei", "vrp", "vrp"
|
|
115
|
+
if vendor == "netgear":
|
|
116
|
+
return "netgear", "firmware", "firmware"
|
|
117
|
+
return vendor or "network", (product or "firmware").replace(" ", "_"), "firmware"
|
|
118
|
+
|
|
119
|
+
# Mobile
|
|
120
|
+
if fam == "android":
|
|
121
|
+
return "google", "android", "android"
|
|
122
|
+
if fam == "ios":
|
|
123
|
+
return "apple", "iphone_os", "ios"
|
|
124
|
+
|
|
125
|
+
# Fallback
|
|
126
|
+
return (vendor or fam or "unknown"), (product or (fam or "unknown")).replace(" ", "_"), fam or "unknown"
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _fmt_version(p: OSData, strategy: str) -> tuple[str, str, str]:
|
|
130
|
+
"""Return (version, update, edition) strings per strategy."""
|
|
131
|
+
maj, minr, pat = p.version_major, p.version_minor, p.version_patch
|
|
132
|
+
build = p.version_build
|
|
133
|
+
channel = (p.channel or "").lower() if p.channel else None
|
|
134
|
+
edition = (p.edition or "").lower() if p.edition else None
|
|
135
|
+
|
|
136
|
+
if strategy == "windows":
|
|
137
|
+
# Product may include channel; version should reflect NT kernel + build when known
|
|
138
|
+
if p.kernel_version:
|
|
139
|
+
ver = p.kernel_version
|
|
140
|
+
elif p.version_build:
|
|
141
|
+
nt = p.evidence.get("nt_version") if isinstance(p.evidence, dict) else None
|
|
142
|
+
ver = f"{nt}.{p.version_build}" if nt else p.version_build
|
|
143
|
+
else:
|
|
144
|
+
ver = "*"
|
|
145
|
+
return ver, "*", "*"
|
|
146
|
+
|
|
147
|
+
if strategy == "ubuntu":
|
|
148
|
+
if maj is not None and minr is not None:
|
|
149
|
+
ver = f"{maj}.{minr:02d}"
|
|
150
|
+
elif maj is not None:
|
|
151
|
+
ver = f"{maj}.00"
|
|
152
|
+
else:
|
|
153
|
+
ver = "*"
|
|
154
|
+
return ver, "*", "*"
|
|
155
|
+
|
|
156
|
+
if strategy in ("debian", "rhel", "sles", "opensuse", "fedora", "amazon"):
|
|
157
|
+
if maj is not None and minr is not None and strategy in ("opensuse",):
|
|
158
|
+
ver = f"{maj}.{minr}"
|
|
159
|
+
elif maj is not None:
|
|
160
|
+
ver = str(maj)
|
|
161
|
+
else:
|
|
162
|
+
ver = "*"
|
|
163
|
+
return ver, "*", "*"
|
|
164
|
+
|
|
165
|
+
if strategy == "macos":
|
|
166
|
+
ver = f"{maj}.{minr if minr is not None else 0}" if maj is not None else "*"
|
|
167
|
+
return ver, "*", "*"
|
|
168
|
+
|
|
169
|
+
if strategy in ("ios_xe", "nx_os", "junos"):
|
|
170
|
+
# Prefer build if present; else compose from parts
|
|
171
|
+
if build:
|
|
172
|
+
ver = build.lower()
|
|
173
|
+
elif maj is not None:
|
|
174
|
+
if minr is not None and pat is not None:
|
|
175
|
+
ver = f"{maj}.{minr}.{pat}"
|
|
176
|
+
elif minr is not None:
|
|
177
|
+
ver = f"{maj}.{minr}"
|
|
178
|
+
else:
|
|
179
|
+
ver = f"{maj}"
|
|
180
|
+
else:
|
|
181
|
+
ver = "*"
|
|
182
|
+
return ver, "*", (edition or "*")
|
|
183
|
+
|
|
184
|
+
if strategy == "fortios":
|
|
185
|
+
if maj is not None and minr is not None and pat is not None:
|
|
186
|
+
ver = f"{maj}.{minr}.{pat}"
|
|
187
|
+
elif build:
|
|
188
|
+
# Fallback if only build available
|
|
189
|
+
ver = build.split("+")[0]
|
|
190
|
+
else:
|
|
191
|
+
ver = "*"
|
|
192
|
+
return ver, "*", "*"
|
|
193
|
+
|
|
194
|
+
if strategy in ("vrp", "firmware"):
|
|
195
|
+
if build:
|
|
196
|
+
ver = build
|
|
197
|
+
elif maj is not None:
|
|
198
|
+
if minr is not None and pat is not None:
|
|
199
|
+
ver = f"{maj}.{minr}.{pat}"
|
|
200
|
+
elif minr is not None:
|
|
201
|
+
ver = f"{maj}.{minr}"
|
|
202
|
+
else:
|
|
203
|
+
ver = f"{maj}"
|
|
204
|
+
else:
|
|
205
|
+
ver = "*"
|
|
206
|
+
return ver, "*", "*"
|
|
207
|
+
|
|
208
|
+
# Generic fallback
|
|
209
|
+
if build:
|
|
210
|
+
ver = build
|
|
211
|
+
elif maj is not None:
|
|
212
|
+
if minr is not None and pat is not None:
|
|
213
|
+
ver = f"{maj}.{minr}.{pat}"
|
|
214
|
+
elif minr is not None:
|
|
215
|
+
ver = f"{maj}.{minr}"
|
|
216
|
+
else:
|
|
217
|
+
ver = f"{maj}"
|
|
218
|
+
else:
|
|
219
|
+
ver = "*"
|
|
220
|
+
return ver, "*", "*"
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _cpe_target_hw(arch: str | None) -> str:
|
|
224
|
+
if not arch:
|
|
225
|
+
return "*"
|
|
226
|
+
a = arch.lower()
|
|
227
|
+
if a in ("x86_64", "amd64"):
|
|
228
|
+
return "x64"
|
|
229
|
+
if a in ("x86", "i386", "i686"):
|
|
230
|
+
return "x86"
|
|
231
|
+
if a in ("arm64", "aarch64"):
|
|
232
|
+
return "arm64"
|
|
233
|
+
if a.startswith("arm"):
|
|
234
|
+
return "arm"
|
|
235
|
+
return a
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def build_cpe23(p: OSData) -> str:
|
|
239
|
+
"""Build a cpe:2.3 string from an OSData instance."""
|
|
240
|
+
part = "o"
|
|
241
|
+
vendor, product, strategy = _map_vendor_product(p)
|
|
242
|
+
version, update, edition = _fmt_version(p, strategy)
|
|
243
|
+
|
|
244
|
+
# Always lower-case vendor/product tokens; keep version/update as-is case except we lowered some above
|
|
245
|
+
vendor_token = _escape(vendor.lower())
|
|
246
|
+
product_token = _escape(product.lower())
|
|
247
|
+
version_token = _escape(version)
|
|
248
|
+
update_token = _escape(update)
|
|
249
|
+
edition_token = _escape(edition)
|
|
250
|
+
|
|
251
|
+
fields = [
|
|
252
|
+
"cpe:2.3",
|
|
253
|
+
part,
|
|
254
|
+
vendor_token,
|
|
255
|
+
product_token,
|
|
256
|
+
version_token,
|
|
257
|
+
update_token,
|
|
258
|
+
edition_token,
|
|
259
|
+
"*", # language
|
|
260
|
+
"*", # sw_edition
|
|
261
|
+
"*", # target_sw
|
|
262
|
+
_escape(_cpe_target_hw(getattr(p, "arch", None))), # target_hw
|
|
263
|
+
"*", # other
|
|
264
|
+
]
|
|
265
|
+
return ":".join(fields)
|
os_normalizer/helpers.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""Utility functions shared across the OS fingerprinting package."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from .constants import ARCH_SYNONYMS
|
|
7
|
+
from .models import OSData
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def norm_arch(s: str | None) -> str | None:
|
|
11
|
+
"""Normalise an architecture string using ARCH_SYNONYMS."""
|
|
12
|
+
if not s:
|
|
13
|
+
return None
|
|
14
|
+
a = s.strip().lower()
|
|
15
|
+
return ARCH_SYNONYMS.get(a, a)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def parse_semver_like(text: str) -> tuple[int | None, int | None, int | None]:
|
|
19
|
+
"""Extract up to three integer components from a version-like string.
|
|
20
|
+
|
|
21
|
+
Returns (major, minor, patch) where missing parts are None.
|
|
22
|
+
"""
|
|
23
|
+
m = re.search(r"\b(\d+)(?:\.(\d+))?(?:\.(\d+))?\b", text)
|
|
24
|
+
if not m:
|
|
25
|
+
return None, None, None
|
|
26
|
+
major = int(m.group(1))
|
|
27
|
+
minor = int(m.group(2)) if m.group(2) else None
|
|
28
|
+
patch = int(m.group(3)) if m.group(3) else None
|
|
29
|
+
return major, minor, patch
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def precision_from_parts(
|
|
33
|
+
major: int | None,
|
|
34
|
+
minor: int | None,
|
|
35
|
+
patch: int | None,
|
|
36
|
+
build: str | None,
|
|
37
|
+
) -> str:
|
|
38
|
+
"""Derive a precision label from version components."""
|
|
39
|
+
if build:
|
|
40
|
+
return "build"
|
|
41
|
+
if patch is not None:
|
|
42
|
+
return "patch"
|
|
43
|
+
if minor is not None:
|
|
44
|
+
return "minor"
|
|
45
|
+
if major is not None:
|
|
46
|
+
return "major"
|
|
47
|
+
return "product"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def canonical_key(p: OSData) -> str:
|
|
51
|
+
"""Generate a deterministic key for an OSData instance.
|
|
52
|
+
|
|
53
|
+
The function expects the object to have vendor, product, version_* and edition fields.
|
|
54
|
+
"""
|
|
55
|
+
vendor = (p.vendor or "-").lower()
|
|
56
|
+
product = (p.product or "-").lower()
|
|
57
|
+
version = ".".join([str(x) for x in [p.version_major, p.version_minor, p.version_patch] if x is not None]) or "-"
|
|
58
|
+
edition = (p.edition or "-").lower()
|
|
59
|
+
codename = (p.codename or "-").lower()
|
|
60
|
+
return f"{vendor}:{product}:{version}:{edition}:{codename}"
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# Regex for extracting an architecture token from free-form text
|
|
64
|
+
ARCH_TEXT_RE = re.compile(r"\b(x86_64|amd64|x64|x86|i386|i686|arm64|aarch64|armv8|armv7l?|ppc64le)\b", re.IGNORECASE)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def extract_arch_from_text(text: str) -> str | None:
|
|
68
|
+
"""Fallback architecture extraction from arbitrary text."""
|
|
69
|
+
m = ARCH_TEXT_RE.search(text)
|
|
70
|
+
if not m:
|
|
71
|
+
return None
|
|
72
|
+
raw = m.group(1).lower()
|
|
73
|
+
return ARCH_SYNONYMS.get(raw, raw)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def parse_os_release(blob_text: str) -> dict[str, Any]:
|
|
77
|
+
"""Parse the contents of an /etc/os-release style file.
|
|
78
|
+
|
|
79
|
+
Returns a dict with selected keys (ID, ID_LIKE, PRETTY_NAME, VERSION_ID, VERSION_CODENAME).
|
|
80
|
+
"""
|
|
81
|
+
out: dict[str, Any] = {}
|
|
82
|
+
for line in blob_text.splitlines():
|
|
83
|
+
clean = line.strip()
|
|
84
|
+
if not clean or clean.startswith("#") or "=" not in clean:
|
|
85
|
+
continue
|
|
86
|
+
k, v = clean.split("=", 1)
|
|
87
|
+
k = k.strip().upper()
|
|
88
|
+
if k == "ID_LIKE":
|
|
89
|
+
out[k] = [s.strip().lower() for s in re.split(r"[ ,]+", v.strip("\"'").strip()) if s]
|
|
90
|
+
else:
|
|
91
|
+
out[k] = v.strip("\"'")
|
|
92
|
+
return out
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def update_confidence(p: OSData, precision: str) -> None:
|
|
96
|
+
"""Boost confidence based on the determined precision level.
|
|
97
|
+
|
|
98
|
+
The mapping mirrors the original ad-hoc values used throughout the monolithic file.
|
|
99
|
+
"""
|
|
100
|
+
boost_map = {
|
|
101
|
+
"build": 0.85,
|
|
102
|
+
"patch": 0.80,
|
|
103
|
+
"minor": 0.75,
|
|
104
|
+
"major": 0.70,
|
|
105
|
+
"product": 0.60,
|
|
106
|
+
}
|
|
107
|
+
p.confidence = max(p.confidence, boost_map.get(precision, 0.5))
|
os_normalizer/models.py
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""Data model definitions for OS fingerprinting."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field, fields as dataclass_fields
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class OSData:
|
|
11
|
+
"""Structured representation of a parsed operating system."""
|
|
12
|
+
|
|
13
|
+
# Core identity
|
|
14
|
+
family: str | None = None # windows, linux, macos, ios, android, bsd, network-os
|
|
15
|
+
vendor: str | None = None # Microsoft, Apple, Canonical, Cisco, Juniper, Fortinet, Huawei, Netgear…
|
|
16
|
+
product: str | None = None # Windows 11, Ubuntu, macOS, IOS XE, Junos, FortiOS, VRP, Firmware
|
|
17
|
+
edition: str | None = None # Pro/Enterprise/LTSC; universalk9/ipbase; etc.
|
|
18
|
+
codename: str | None = None # Sequoia; Ubuntu codename; Cisco train
|
|
19
|
+
channel: str | None = None # LTS/Beta/GA/R3-S3 etc.
|
|
20
|
+
|
|
21
|
+
# Versions
|
|
22
|
+
version_major: int | None = None
|
|
23
|
+
version_minor: int | None = None
|
|
24
|
+
version_patch: int | None = None
|
|
25
|
+
version_build: str | None = None # Windows build; network image tag
|
|
26
|
+
|
|
27
|
+
# Kernel / image details
|
|
28
|
+
kernel_name: str | None = None
|
|
29
|
+
kernel_version: str | None = None
|
|
30
|
+
arch: str | None = None
|
|
31
|
+
distro: str | None = None
|
|
32
|
+
like_distros: list[str] = field(default_factory=list)
|
|
33
|
+
pretty_name: str | None = None
|
|
34
|
+
|
|
35
|
+
# Network device extras
|
|
36
|
+
hw_model: str | None = None
|
|
37
|
+
build_id: str | None = None
|
|
38
|
+
|
|
39
|
+
# Meta information
|
|
40
|
+
precision: str = "unknown" # family|product|major|minor|patch|build
|
|
41
|
+
confidence: float = 0.0
|
|
42
|
+
evidence: dict[str, Any] = field(default_factory=dict)
|
|
43
|
+
|
|
44
|
+
# Canonical key for deduplication / indexing
|
|
45
|
+
os_key: str | None = field(default=None, compare=False)
|
|
46
|
+
|
|
47
|
+
def __str__(self) -> str: # pragma: no cover - formatting helper
|
|
48
|
+
parts: list[str] = []
|
|
49
|
+
|
|
50
|
+
# Prefer vendor + product; fallback to pretty_name; then family
|
|
51
|
+
name_bits = [x for x in (self.vendor, self.product) if x]
|
|
52
|
+
if name_bits:
|
|
53
|
+
parts.append(" ".join(name_bits))
|
|
54
|
+
elif self.pretty_name:
|
|
55
|
+
parts.append(self.pretty_name)
|
|
56
|
+
else:
|
|
57
|
+
parts.append(self.family or "Unknown OS")
|
|
58
|
+
|
|
59
|
+
# Version string (major[.minor[.patch]]) and optional build
|
|
60
|
+
ver_chunks: list[str] = []
|
|
61
|
+
if self.version_major is not None:
|
|
62
|
+
ver = str(self.version_major)
|
|
63
|
+
if self.version_minor is not None:
|
|
64
|
+
ver += f".{self.version_minor}"
|
|
65
|
+
if self.version_patch is not None:
|
|
66
|
+
ver += f".{self.version_patch}"
|
|
67
|
+
ver_chunks.append(ver)
|
|
68
|
+
if self.version_build:
|
|
69
|
+
ver_chunks.append(f"build {self.version_build}")
|
|
70
|
+
if ver_chunks:
|
|
71
|
+
parts.append(" ".join(ver_chunks))
|
|
72
|
+
|
|
73
|
+
# Edition (e.g., Enterprise, LTSC)
|
|
74
|
+
if self.edition:
|
|
75
|
+
parts.append(self.edition)
|
|
76
|
+
|
|
77
|
+
# Codename and/or channel in parentheses
|
|
78
|
+
codchan = ", ".join([x for x in (self.codename, self.channel) if x])
|
|
79
|
+
if codchan:
|
|
80
|
+
parts.append(f"({codchan})")
|
|
81
|
+
|
|
82
|
+
# Architecture
|
|
83
|
+
if self.arch:
|
|
84
|
+
parts.append(self.arch)
|
|
85
|
+
|
|
86
|
+
# Kernel info
|
|
87
|
+
kernel_bits = " ".join([x for x in (self.kernel_name, self.kernel_version) if x])
|
|
88
|
+
if kernel_bits:
|
|
89
|
+
parts.append(f"[kernel: {kernel_bits}]")
|
|
90
|
+
|
|
91
|
+
# Hardware model (common for network devices)
|
|
92
|
+
if self.hw_model:
|
|
93
|
+
parts.append(f"[hw: {self.hw_model}]")
|
|
94
|
+
|
|
95
|
+
# Separate build identifier if distinct from version_build
|
|
96
|
+
if self.build_id and self.build_id != self.version_build:
|
|
97
|
+
parts.append(f"[build: {self.build_id}]")
|
|
98
|
+
|
|
99
|
+
# Precision/confidence summary
|
|
100
|
+
if self.precision and self.precision != "unknown":
|
|
101
|
+
parts.append(f"{{{self.precision}:{self.confidence:.2f}}}")
|
|
102
|
+
elif self.confidence:
|
|
103
|
+
parts.append(f"{{{self.confidence:.2f}}}")
|
|
104
|
+
|
|
105
|
+
return " ".join(parts)
|
|
106
|
+
|
|
107
|
+
def __repr__(self) -> str: # pragma: no cover - formatting helper
|
|
108
|
+
# Delegate to __str__ for concise, human-friendly debug output
|
|
109
|
+
return f"OSData({str(self)})"
|
|
110
|
+
|
|
111
|
+
def full(self, none_str="<None>") -> str: # pragma: no cover - formatting helper
|
|
112
|
+
"""Return all fields in a neat two-column, aligned layout.
|
|
113
|
+
|
|
114
|
+
Example:
|
|
115
|
+
family : linux
|
|
116
|
+
vendor : Canonical
|
|
117
|
+
...
|
|
118
|
+
If a field is None, prints "<None>" or none_val.
|
|
119
|
+
"""
|
|
120
|
+
# Collect (name, value) pairs in declared order
|
|
121
|
+
rows: list[tuple[str, str]] = []
|
|
122
|
+
for f in dataclass_fields(self):
|
|
123
|
+
name = f.name
|
|
124
|
+
val = getattr(self, name)
|
|
125
|
+
if val is None:
|
|
126
|
+
sval = none_str
|
|
127
|
+
elif name == "confidence" and isinstance(val, (int, float)):
|
|
128
|
+
sval = f"{float(val):.2f}"
|
|
129
|
+
elif isinstance(val, list):
|
|
130
|
+
sval = ", ".join(str(x) for x in val)
|
|
131
|
+
elif isinstance(val, dict):
|
|
132
|
+
# Shallow, compact dict repr with sorted keys for stability
|
|
133
|
+
items = ", ".join(f"{k}={val[k]!r}" for k in sorted(val))
|
|
134
|
+
sval = "{" + items + "}"
|
|
135
|
+
else:
|
|
136
|
+
sval = str(val)
|
|
137
|
+
rows.append((name, sval))
|
|
138
|
+
|
|
139
|
+
width = max(len(name) for name, _ in rows) if rows else 0
|
|
140
|
+
lines = [f"{name:<{width}} : {sval}" for name, sval in rows]
|
|
141
|
+
return "\n".join(lines)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
if __name__ == "__main__":
|
|
145
|
+
x = OSData(
|
|
146
|
+
family="linux",
|
|
147
|
+
vendor="Fedora Project",
|
|
148
|
+
product="Fedora Linux",
|
|
149
|
+
version_major=33,
|
|
150
|
+
kernel_name="linux",
|
|
151
|
+
kernel_version="5.4.0-70-generic",
|
|
152
|
+
distro="fedora",
|
|
153
|
+
like_distros=[],
|
|
154
|
+
pretty_name="Fedora Linux",
|
|
155
|
+
precision="major",
|
|
156
|
+
confidence=0.7,
|
|
157
|
+
evidence={"hit": "linux"},
|
|
158
|
+
)
|
|
159
|
+
print("Normal:", x, "\nFull:", x.full(none_str=""), sep="\n", end="\n\n")
|