os-normalizer 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of os-normalizer might be problematic. Click here for more details.
- os_normalizer/__init__.py +10 -0
- os_normalizer/constants.py +87 -0
- os_normalizer/cpe.py +265 -0
- os_normalizer/helpers.py +107 -0
- os_normalizer/models.py +159 -0
- os_normalizer/os_normalizer.py +313 -0
- os_normalizer/parsers/__init__.py +16 -0
- os_normalizer/parsers/bsd.py +69 -0
- os_normalizer/parsers/linux.py +121 -0
- os_normalizer/parsers/macos.py +111 -0
- os_normalizer/parsers/mobile.py +37 -0
- os_normalizer/parsers/network/__init__.py +61 -0
- os_normalizer/parsers/network/cisco.py +96 -0
- os_normalizer/parsers/network/fortinet.py +56 -0
- os_normalizer/parsers/network/huawei.py +38 -0
- os_normalizer/parsers/network/juniper.py +42 -0
- os_normalizer/parsers/network/netgear.py +41 -0
- os_normalizer/parsers/windows.py +197 -0
- os_normalizer-0.3.2.dist-info/METADATA +172 -0
- os_normalizer-0.3.2.dist-info/RECORD +22 -0
- os_normalizer-0.3.2.dist-info/WHEEL +4 -0
- os_normalizer-0.3.2.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
from datetime import UTC, datetime
|
|
2
|
+
from typing import Any, Iterable
|
|
3
|
+
from dataclasses import replace, fields
|
|
4
|
+
import copy
|
|
5
|
+
|
|
6
|
+
from os_normalizer.helpers import extract_arch_from_text, precision_from_parts, update_confidence
|
|
7
|
+
from os_normalizer.cpe import build_cpe23
|
|
8
|
+
from os_normalizer.models import OSData
|
|
9
|
+
from os_normalizer.parsers.bsd import parse_bsd
|
|
10
|
+
from os_normalizer.parsers.linux import parse_linux
|
|
11
|
+
from os_normalizer.parsers.macos import parse_macos
|
|
12
|
+
from os_normalizer.parsers.mobile import parse_mobile
|
|
13
|
+
from os_normalizer.parsers.network import parse_network
|
|
14
|
+
from os_normalizer.parsers.windows import parse_windows
|
|
15
|
+
|
|
16
|
+
PRECISION_ORDER = {
|
|
17
|
+
"build": 6,
|
|
18
|
+
"patch": 5,
|
|
19
|
+
"minor": 4,
|
|
20
|
+
"major": 3,
|
|
21
|
+
"product": 2,
|
|
22
|
+
"family": 1,
|
|
23
|
+
"unknown": 0,
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# ============================================================
|
|
28
|
+
# Family detection (orchestrator logic)
|
|
29
|
+
# ============================================================
|
|
30
|
+
def detect_family(text: str, data: dict[str, Any]) -> tuple[str | None, float, dict[str, Any]]:
|
|
31
|
+
t = text.lower()
|
|
32
|
+
ev = {}
|
|
33
|
+
# Obvious network signals first
|
|
34
|
+
if any(
|
|
35
|
+
x in t
|
|
36
|
+
for x in [
|
|
37
|
+
"cisco",
|
|
38
|
+
"nx-os",
|
|
39
|
+
"ios xe",
|
|
40
|
+
"ios-xe",
|
|
41
|
+
"junos",
|
|
42
|
+
"fortios",
|
|
43
|
+
"fortigate",
|
|
44
|
+
"huawei",
|
|
45
|
+
"vrp",
|
|
46
|
+
"netgear",
|
|
47
|
+
"firmware v",
|
|
48
|
+
]
|
|
49
|
+
):
|
|
50
|
+
# Special handling for 'ios' - if it's just 'ios' without 'cisco', treat as mobile, not network
|
|
51
|
+
if "ios " in t and "cisco" not in t:
|
|
52
|
+
ev["hit"] = "ios"
|
|
53
|
+
return "ios", 0.6, ev
|
|
54
|
+
|
|
55
|
+
ev["hit"] = "network-os"
|
|
56
|
+
return "network-os", 0.7, ev
|
|
57
|
+
# Linux
|
|
58
|
+
if "linux" in t or any(k in data for k in ("ID", "ID_LIKE", "PRETTY_NAME", "VERSION_ID", "VERSION_CODENAME")):
|
|
59
|
+
ev["hit"] = "linux"
|
|
60
|
+
return "linux", 0.6, ev
|
|
61
|
+
# Windows
|
|
62
|
+
if "windows" in t or "nt " in t or t.startswith("win") or data.get("os", "").lower() == "windows":
|
|
63
|
+
ev["hit"] = "windows"
|
|
64
|
+
return "windows", 0.6, ev
|
|
65
|
+
# Apple
|
|
66
|
+
if "macos" in t or "os x" in t or "darwin" in t:
|
|
67
|
+
ev["hit"] = "macos"
|
|
68
|
+
return "macos", 0.6, ev
|
|
69
|
+
if "ios" in t or "ipados" in t:
|
|
70
|
+
ev["hit"] = "ios"
|
|
71
|
+
return "ios", 0.6, ev
|
|
72
|
+
# Android
|
|
73
|
+
if "android" in t:
|
|
74
|
+
ev["hit"] = "android"
|
|
75
|
+
return "android", 0.6, ev
|
|
76
|
+
# BSD
|
|
77
|
+
if "freebsd" in t or "openbsd" in t or "netbsd" in t:
|
|
78
|
+
ev["hit"] = "bsd"
|
|
79
|
+
return "bsd", 0.6, ev
|
|
80
|
+
return None, 0.0, ev
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def normalize_os(text: str, data: dict | None = None) -> OSData:
|
|
84
|
+
text = text.strip()
|
|
85
|
+
data = data or {}
|
|
86
|
+
t = text.lower()
|
|
87
|
+
|
|
88
|
+
p = OSData()
|
|
89
|
+
|
|
90
|
+
# Family detection
|
|
91
|
+
fam, base_conf, ev = detect_family(t, data)
|
|
92
|
+
p.family = fam
|
|
93
|
+
p.confidence = max(p.confidence, base_conf)
|
|
94
|
+
p.evidence.update(ev)
|
|
95
|
+
|
|
96
|
+
if fam == "network-os":
|
|
97
|
+
p = parse_network(text, data, p)
|
|
98
|
+
elif fam == "windows":
|
|
99
|
+
p = parse_windows(text, data, p)
|
|
100
|
+
elif fam == "macos":
|
|
101
|
+
p = parse_macos(text, data, p)
|
|
102
|
+
elif fam == "linux":
|
|
103
|
+
p = parse_linux(text, data, p)
|
|
104
|
+
elif fam in ("android", "ios"):
|
|
105
|
+
p = parse_mobile(text, data, p)
|
|
106
|
+
elif fam == "bsd":
|
|
107
|
+
p = parse_bsd(text, data, p)
|
|
108
|
+
else:
|
|
109
|
+
p.precision = "unknown"
|
|
110
|
+
|
|
111
|
+
# Fallback arch from text if not already set elsewhere
|
|
112
|
+
if not p.arch:
|
|
113
|
+
p.arch = extract_arch_from_text(text)
|
|
114
|
+
|
|
115
|
+
# Populate canonical os_key as CPE 2.3
|
|
116
|
+
try:
|
|
117
|
+
p.os_key = build_cpe23(p)
|
|
118
|
+
except Exception:
|
|
119
|
+
# Be resilient: leave unset on any unexpected error
|
|
120
|
+
p.os_key = None
|
|
121
|
+
|
|
122
|
+
return p
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def choose_best_fact(candidates: list[OSData]) -> OSData:
|
|
126
|
+
if not candidates:
|
|
127
|
+
raise ValueError("No candidates")
|
|
128
|
+
return sorted(
|
|
129
|
+
candidates,
|
|
130
|
+
key=lambda c: (PRECISION_ORDER.get(c.precision, 0), c.confidence),
|
|
131
|
+
reverse=True,
|
|
132
|
+
)[0]
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
# ============================================================
|
|
136
|
+
# Merge/update APIs
|
|
137
|
+
# ============================================================
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _score(p: OSData) -> tuple[int, float]:
|
|
141
|
+
return (PRECISION_ORDER.get(p.precision, 0), p.confidence)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _union_unique(values: Iterable[str]) -> list[str]:
|
|
145
|
+
seen: set[str] = set()
|
|
146
|
+
out: list[str] = []
|
|
147
|
+
for v in values:
|
|
148
|
+
if v not in seen:
|
|
149
|
+
seen.add(v)
|
|
150
|
+
out.append(v)
|
|
151
|
+
return out
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def merge_os(a: OSData, b: OSData, policy: str = "auto") -> OSData:
|
|
155
|
+
"""Merge two OSData objects into a new one.
|
|
156
|
+
|
|
157
|
+
Default policy 'auto' keeps the base object with higher (precision, confidence)
|
|
158
|
+
and fills missing details from the other. Conflicts are recorded under
|
|
159
|
+
evidence['conflicts'] and evidence['alt'].
|
|
160
|
+
"""
|
|
161
|
+
base, other = (a, b) if _score(a) >= _score(b) else (b, a)
|
|
162
|
+
r = replace(base)
|
|
163
|
+
|
|
164
|
+
# Ensure evidence containers exist
|
|
165
|
+
r.evidence = dict(r.evidence or {})
|
|
166
|
+
conflicts = dict(r.evidence.get("conflicts", {}))
|
|
167
|
+
alts = dict(r.evidence.get("alt", {}))
|
|
168
|
+
|
|
169
|
+
def fill(field: str) -> None:
|
|
170
|
+
av = getattr(r, field)
|
|
171
|
+
bv = getattr(other, field)
|
|
172
|
+
if av is None and bv is not None:
|
|
173
|
+
setattr(r, field, bv)
|
|
174
|
+
elif bv is not None and av is not None and av != bv:
|
|
175
|
+
# Conflict: keep current, store alternative
|
|
176
|
+
alts[field] = bv
|
|
177
|
+
conflicts[field] = [av, bv]
|
|
178
|
+
|
|
179
|
+
# Identity & descriptive fields
|
|
180
|
+
for f in (
|
|
181
|
+
"family",
|
|
182
|
+
"vendor",
|
|
183
|
+
"product",
|
|
184
|
+
"edition",
|
|
185
|
+
"codename",
|
|
186
|
+
"channel",
|
|
187
|
+
"distro",
|
|
188
|
+
"pretty_name",
|
|
189
|
+
"kernel_name",
|
|
190
|
+
"kernel_version",
|
|
191
|
+
"arch",
|
|
192
|
+
"hw_model",
|
|
193
|
+
"build_id",
|
|
194
|
+
):
|
|
195
|
+
fill(f)
|
|
196
|
+
|
|
197
|
+
# kernel_version: prefer longer/more specific token when both present
|
|
198
|
+
if base.kernel_version and other.kernel_version and len(other.kernel_version) > len(base.kernel_version):
|
|
199
|
+
r.kernel_version = other.kernel_version
|
|
200
|
+
|
|
201
|
+
# like_distros union
|
|
202
|
+
if other.like_distros:
|
|
203
|
+
r.like_distros = _union_unique([*(r.like_distros or []), *other.like_distros])
|
|
204
|
+
|
|
205
|
+
# Version components: fill missing; record conflicts if both present and differ
|
|
206
|
+
def choose_versions(x: OSData, y: OSData) -> tuple[str | None, int | None, int | None, int | None]:
|
|
207
|
+
build = x.version_build or y.version_build
|
|
208
|
+
maj = x.version_major if x.version_major is not None else y.version_major
|
|
209
|
+
minr = x.version_minor if x.version_minor is not None else y.version_minor
|
|
210
|
+
pat = x.version_patch if x.version_patch is not None else y.version_patch
|
|
211
|
+
for name, xa, ya in (
|
|
212
|
+
("version_build", x.version_build, y.version_build),
|
|
213
|
+
("version_major", x.version_major, y.version_major),
|
|
214
|
+
("version_minor", x.version_minor, y.version_minor),
|
|
215
|
+
("version_patch", x.version_patch, y.version_patch),
|
|
216
|
+
):
|
|
217
|
+
if xa is not None and ya is not None and xa != ya:
|
|
218
|
+
conflicts[name] = [xa, ya]
|
|
219
|
+
return build, maj, minr, pat
|
|
220
|
+
|
|
221
|
+
vb, vmaj, vmin, vpat = choose_versions(base, other)
|
|
222
|
+
r.version_build, r.version_major, r.version_minor, r.version_patch = vb, vmaj, vmin, vpat
|
|
223
|
+
|
|
224
|
+
# Merge evidence (shallow)
|
|
225
|
+
if other.evidence:
|
|
226
|
+
r.evidence.update(other.evidence)
|
|
227
|
+
|
|
228
|
+
# Precision & confidence: recompute based on version parts
|
|
229
|
+
new_prec = precision_from_parts(r.version_major, r.version_minor, r.version_patch, r.version_build)
|
|
230
|
+
if new_prec == "product" and not r.product:
|
|
231
|
+
new_prec = "family" if r.family else "unknown"
|
|
232
|
+
r.precision = new_prec
|
|
233
|
+
r.confidence = max(a.confidence, b.confidence)
|
|
234
|
+
update_confidence(r, r.precision)
|
|
235
|
+
|
|
236
|
+
# Attach conflicts/alternates
|
|
237
|
+
if conflicts:
|
|
238
|
+
r.evidence["conflicts"] = conflicts
|
|
239
|
+
if alts:
|
|
240
|
+
r.evidence["alt"] = alts
|
|
241
|
+
|
|
242
|
+
# Refresh CPE key
|
|
243
|
+
try:
|
|
244
|
+
r.os_key = build_cpe23(r)
|
|
245
|
+
except Exception:
|
|
246
|
+
r.os_key = None
|
|
247
|
+
|
|
248
|
+
return r
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def update_os(
|
|
252
|
+
existing: OSData, text: str | None = None, data: dict | None = None, policy: str = "auto", inplace: bool = False
|
|
253
|
+
) -> OSData:
|
|
254
|
+
"""Parse new input, merge into existing OSData, and return the result.
|
|
255
|
+
|
|
256
|
+
Set inplace=True to mutate the existing instance.
|
|
257
|
+
"""
|
|
258
|
+
incoming = normalize_os(text or "", data or {}) if (text or data) else OSData()
|
|
259
|
+
merged = merge_os(existing, incoming, policy=policy)
|
|
260
|
+
if inplace:
|
|
261
|
+
# Generic copy of dataclass fields with shallow copy for common containers
|
|
262
|
+
for f in fields(OSData):
|
|
263
|
+
val = getattr(merged, f.name)
|
|
264
|
+
if isinstance(val, (dict, list, set)):
|
|
265
|
+
val = copy.copy(val)
|
|
266
|
+
setattr(existing, f.name, val)
|
|
267
|
+
return existing
|
|
268
|
+
return merged
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
if __name__ == "__main__":
|
|
272
|
+
now = datetime.now(tz=UTC)
|
|
273
|
+
samples = [
|
|
274
|
+
{
|
|
275
|
+
"text": "Windows NT 10.0 build 22631 Enterprise x64",
|
|
276
|
+
},
|
|
277
|
+
{
|
|
278
|
+
"text": "Darwin 24.0.0; macOS Sequoia arm64",
|
|
279
|
+
},
|
|
280
|
+
{
|
|
281
|
+
"text": "Linux host 5.15.0-122-generic x86_64",
|
|
282
|
+
"data": {
|
|
283
|
+
"os_release": 'NAME="Ubuntu"\nID=ubuntu\nVERSION_ID="22.04.4"\nVERSION_CODENAME=jammy\nPRETTY_NAME="Ubuntu 22.04.4 LTS"',
|
|
284
|
+
},
|
|
285
|
+
},
|
|
286
|
+
{
|
|
287
|
+
"text": "Cisco IOS XE Software, Version 17.9.4a (Amsterdam) C9300-24T, universalk9, c9300-universalk9.17.09.04a.SPA.bin",
|
|
288
|
+
},
|
|
289
|
+
{
|
|
290
|
+
"text": "FortiGate-100F v7.2.7 build1600 (GA) FGT_7.2.7-build1600",
|
|
291
|
+
},
|
|
292
|
+
{
|
|
293
|
+
"text": "Cisco Nexus Operating System (NX-OS) Software nxos.9.3.5.bin N9K-C93180YC-FX",
|
|
294
|
+
},
|
|
295
|
+
{
|
|
296
|
+
"text": "Junos: 20.4R3-S3 jinstall-ex-4300-20.4R3-S3.tgz EX4300-48T",
|
|
297
|
+
},
|
|
298
|
+
{
|
|
299
|
+
"text": "Huawei VRP V800R012C00SPC500 S5720-28X-SI-AC",
|
|
300
|
+
},
|
|
301
|
+
{
|
|
302
|
+
"text": "NETGEAR Firmware V1.0.9.88_10.2.88 R7000",
|
|
303
|
+
},
|
|
304
|
+
{
|
|
305
|
+
"text": "Darwin Mac-Studio.local 24.6.0 Darwin Kernel Version 24.6.0: Mon Jul 14 11:30:40 PDT 2025; root:xnu-11417.140.69~1/RELEASE_ARM64_T6041 arm64",
|
|
306
|
+
},
|
|
307
|
+
]
|
|
308
|
+
|
|
309
|
+
for s in samples:
|
|
310
|
+
parsed = normalize_os(text=s.get("text"), data=s.get("data"))
|
|
311
|
+
print("----", s.get("text"))
|
|
312
|
+
print(parsed)
|
|
313
|
+
print()
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from .windows import parse_windows
|
|
2
|
+
from .macos import parse_macos
|
|
3
|
+
from .linux import parse_linux
|
|
4
|
+
from .mobile import parse_mobile
|
|
5
|
+
from .bsd import parse_bsd
|
|
6
|
+
from .network import parse_network
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"parse_windows",
|
|
10
|
+
"parse_macos",
|
|
11
|
+
"parse_linux",
|
|
12
|
+
"parse_mobile",
|
|
13
|
+
"parse_bsd",
|
|
14
|
+
"parse_network",
|
|
15
|
+
]
|
|
16
|
+
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""BSD specific parsing logic (refactored, variant + channel handling)."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from os_normalizer.helpers import (
|
|
7
|
+
parse_semver_like,
|
|
8
|
+
precision_from_parts,
|
|
9
|
+
update_confidence,
|
|
10
|
+
)
|
|
11
|
+
from os_normalizer.models import OSData
|
|
12
|
+
|
|
13
|
+
FREEBSD_RE = re.compile(r"\bfreebsd\b", re.IGNORECASE)
|
|
14
|
+
OPENBSD_RE = re.compile(r"\bopenbsd\b", re.IGNORECASE)
|
|
15
|
+
NETBSD_RE = re.compile(r"\bnetbsd\b", re.IGNORECASE)
|
|
16
|
+
|
|
17
|
+
VARIANT_VERSION_RE = re.compile(
|
|
18
|
+
r"\b(?:freebsd|openbsd|netbsd)\b\s+(\d+)(?:\.(\d+))?(?:\.(\d+))?",
|
|
19
|
+
re.IGNORECASE,
|
|
20
|
+
)
|
|
21
|
+
BSD_CHANNEL_RE = re.compile(
|
|
22
|
+
r"(?:[-_\s])(RELEASE|STABLE|CURRENT|RC\d*|BETA\d*|RC|BETA)\b",
|
|
23
|
+
re.IGNORECASE,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def parse_bsd(text: str, data: dict[str, Any], p: OSData) -> OSData:
|
|
28
|
+
"""Populate an OSData instance with BSD-specific details.
|
|
29
|
+
|
|
30
|
+
Detects FreeBSD, OpenBSD, or NetBSD and extracts version numbers.
|
|
31
|
+
"""
|
|
32
|
+
tl = text.lower()
|
|
33
|
+
|
|
34
|
+
# Explicitly set product/vendor/kernel by scanning tokens
|
|
35
|
+
if FREEBSD_RE.search(tl):
|
|
36
|
+
name = "FreeBSD"
|
|
37
|
+
elif OPENBSD_RE.search(tl):
|
|
38
|
+
name = "OpenBSD"
|
|
39
|
+
elif NETBSD_RE.search(tl):
|
|
40
|
+
name = "NetBSD"
|
|
41
|
+
else:
|
|
42
|
+
name = "BSD"
|
|
43
|
+
|
|
44
|
+
p.product = name
|
|
45
|
+
p.vendor = name
|
|
46
|
+
p.kernel_name = name.lower()
|
|
47
|
+
|
|
48
|
+
# Prefer variant-anchored version pattern; fall back to generic semver
|
|
49
|
+
x, y, z = _extract_version(text)
|
|
50
|
+
p.version_major, p.version_minor, p.version_patch = x, y, z
|
|
51
|
+
p.precision = precision_from_parts(x, y, z, None) if x else "product"
|
|
52
|
+
|
|
53
|
+
# Channel from explicit markers/suffixes
|
|
54
|
+
ch = BSD_CHANNEL_RE.search(text)
|
|
55
|
+
if ch:
|
|
56
|
+
p.channel = ch.group(1).upper()
|
|
57
|
+
|
|
58
|
+
update_confidence(p, p.precision)
|
|
59
|
+
return p
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _extract_version(text: str) -> tuple[int | None, int | None, int | None]:
|
|
63
|
+
m = VARIANT_VERSION_RE.search(text)
|
|
64
|
+
if m:
|
|
65
|
+
major = int(m.group(1))
|
|
66
|
+
minor = int(m.group(2)) if m.group(2) else None
|
|
67
|
+
patch = int(m.group(3)) if m.group(3) else None
|
|
68
|
+
return major, minor, patch
|
|
69
|
+
return parse_semver_like(text)
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""Linux specific parsing logic (refactored)."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import Any, Optional
|
|
5
|
+
|
|
6
|
+
from os_normalizer.helpers import parse_os_release, update_confidence
|
|
7
|
+
from os_normalizer.models import OSData
|
|
8
|
+
|
|
9
|
+
# Regex patterns used only by the Linux parser
|
|
10
|
+
KERNEL_RE = re.compile(
|
|
11
|
+
r"\b(kernel|uname)\b.*?\b(\d+\.\d+(?:\.\d+)?(?:-\S+)?)",
|
|
12
|
+
re.IGNORECASE,
|
|
13
|
+
)
|
|
14
|
+
LINUX_VER_FALLBACK_RE = re.compile(
|
|
15
|
+
r"\bLinux\b[^\n]*?\b(\d+\.\d+(?:\.\d+)?(?:-[A-Za-z0-9._-]+)?)\b",
|
|
16
|
+
re.IGNORECASE,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def parse_linux(text: str, data: dict[str, Any], p: OSData) -> OSData:
|
|
21
|
+
"""Populate an OSData instance with Linux‑specific details."""
|
|
22
|
+
p.kernel_name = "linux"
|
|
23
|
+
|
|
24
|
+
osrel = _coerce_os_release(data.get("os_release")) if isinstance(data, dict) else None
|
|
25
|
+
|
|
26
|
+
# 1) Kernel version extraction
|
|
27
|
+
p.kernel_version = _extract_kernel_version(text)
|
|
28
|
+
|
|
29
|
+
# 2) Apply os-release information when present
|
|
30
|
+
if osrel:
|
|
31
|
+
_apply_os_release(osrel, p)
|
|
32
|
+
else:
|
|
33
|
+
p.product = p.product or "Linux"
|
|
34
|
+
p.precision = "family"
|
|
35
|
+
|
|
36
|
+
update_confidence(p, p.precision)
|
|
37
|
+
return p
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _coerce_os_release(obj: Any) -> Optional[dict[str, Any]]:
|
|
41
|
+
if isinstance(obj, str):
|
|
42
|
+
return parse_os_release(obj)
|
|
43
|
+
if isinstance(obj, dict):
|
|
44
|
+
return {k.upper(): v for k, v in obj.items()}
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _extract_kernel_version(text: str) -> Optional[str]:
|
|
49
|
+
m = KERNEL_RE.search(text)
|
|
50
|
+
if m:
|
|
51
|
+
return m.group(2)
|
|
52
|
+
m2 = LINUX_VER_FALLBACK_RE.search(text)
|
|
53
|
+
if m2:
|
|
54
|
+
return m2.group(1)
|
|
55
|
+
return None
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _apply_os_release(osrel: dict[str, Any], p: OSData) -> None:
|
|
59
|
+
distro_id = osrel.get("ID")
|
|
60
|
+
if distro_id:
|
|
61
|
+
p.distro = str(distro_id).lower()
|
|
62
|
+
|
|
63
|
+
like = osrel.get("ID_LIKE")
|
|
64
|
+
if like:
|
|
65
|
+
p.like_distros = [s.lower() for s in like] if isinstance(like, list) else [str(like).lower()]
|
|
66
|
+
|
|
67
|
+
p.pretty_name = osrel.get("PRETTY_NAME") or osrel.get("NAME")
|
|
68
|
+
|
|
69
|
+
_apply_version_id(osrel.get("VERSION_ID"), p)
|
|
70
|
+
|
|
71
|
+
vcode = osrel.get("VERSION_CODENAME")
|
|
72
|
+
if vcode:
|
|
73
|
+
p.codename = str(vcode).title()
|
|
74
|
+
|
|
75
|
+
if p.pretty_name and "LTS" in str(p.pretty_name).upper():
|
|
76
|
+
p.channel = "LTS"
|
|
77
|
+
|
|
78
|
+
p.vendor = _vendor_for_distro(p.distro) if p.distro else p.vendor
|
|
79
|
+
|
|
80
|
+
name = osrel.get("NAME")
|
|
81
|
+
p.product = (name if name else (p.distro or "Linux")).replace('"', "") if isinstance(name, str) else (p.distro or "Linux")
|
|
82
|
+
|
|
83
|
+
# Precision from version parts
|
|
84
|
+
if p.version_patch is not None:
|
|
85
|
+
p.precision = "patch"
|
|
86
|
+
elif p.version_minor is not None:
|
|
87
|
+
p.precision = "minor"
|
|
88
|
+
elif p.version_major is not None:
|
|
89
|
+
p.precision = "major"
|
|
90
|
+
else:
|
|
91
|
+
p.precision = "family"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _apply_version_id(vid: Any, p: OSData) -> None:
|
|
95
|
+
if not vid:
|
|
96
|
+
return
|
|
97
|
+
parts = re.split(r"[.]+", str(vid))
|
|
98
|
+
if len(parts) >= 1 and parts[0].isdigit():
|
|
99
|
+
p.version_major = int(parts[0])
|
|
100
|
+
if len(parts) >= 2 and parts[1].isdigit():
|
|
101
|
+
p.version_minor = int(parts[1])
|
|
102
|
+
if len(parts) >= 3 and parts[2].isdigit():
|
|
103
|
+
p.version_patch = int(parts[2])
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _vendor_for_distro(distro: Optional[str]) -> Optional[str]:
|
|
107
|
+
vendor_by_distro = {
|
|
108
|
+
"ubuntu": "Canonical",
|
|
109
|
+
"debian": "Debian",
|
|
110
|
+
"rhel": "Red Hat",
|
|
111
|
+
"rocky": "Rocky",
|
|
112
|
+
"almalinux": "AlmaLinux",
|
|
113
|
+
"centos": "Red Hat",
|
|
114
|
+
"amzn": "Amazon",
|
|
115
|
+
"amazon": "Amazon",
|
|
116
|
+
"sles": "SUSE",
|
|
117
|
+
"opensuse": "SUSE",
|
|
118
|
+
"arch": "Arch",
|
|
119
|
+
"fedora": "Fedora Project",
|
|
120
|
+
}
|
|
121
|
+
return vendor_by_distro.get(distro) if distro else None
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""macOS specific parsing logic (refactored)."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from os_normalizer.constants import MACOS_ALIASES, MACOS_DARWIN_MAP
|
|
7
|
+
from os_normalizer.helpers import update_confidence
|
|
8
|
+
from os_normalizer.models import OSData
|
|
9
|
+
|
|
10
|
+
# Regex patterns used only by the macOS parser
|
|
11
|
+
DARWIN_RE = re.compile(
|
|
12
|
+
r"\bdarwin\b[^\d\n]*?(\d+)(?:\.(\d+))?(?:\.(\d+))?\b",
|
|
13
|
+
re.IGNORECASE,
|
|
14
|
+
)
|
|
15
|
+
MACOS_VER_FALLBACK_RE = re.compile(r"\bmacos\s?(\d+)(?:\.(\d+))?", re.IGNORECASE)
|
|
16
|
+
|
|
17
|
+
# Local precision order for simple comparisons
|
|
18
|
+
_PRECISION_ORDER = {"family": 0, "product": 1, "major": 2, "minor": 3, "patch": 4, "build": 5}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def parse_macos(text: str, data: dict[str, Any], p: OSData) -> OSData:
|
|
22
|
+
"""Populate an OSData instance with macOS-specific details."""
|
|
23
|
+
t = text
|
|
24
|
+
tl = t.lower()
|
|
25
|
+
|
|
26
|
+
# Base identity
|
|
27
|
+
p.product = p.product or "macOS"
|
|
28
|
+
p.vendor = p.vendor or "Apple"
|
|
29
|
+
|
|
30
|
+
# 1) Alias-based version hints (e.g., "Sequoia" -> macOS 15)
|
|
31
|
+
_apply_alias_hint(tl, p)
|
|
32
|
+
|
|
33
|
+
# 2) Darwin kernel mapping to macOS version/codename
|
|
34
|
+
_apply_darwin_mapping(t, p)
|
|
35
|
+
|
|
36
|
+
# 3) Fallback: parse "macOS <ver>" from text
|
|
37
|
+
_apply_version_fallback(t, p)
|
|
38
|
+
|
|
39
|
+
# 4) Fallback: detect codename from text if still missing
|
|
40
|
+
_apply_codename_fallback(tl, p)
|
|
41
|
+
|
|
42
|
+
# Confidence boost based on precision
|
|
43
|
+
update_confidence(p, p.precision)
|
|
44
|
+
return p
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _apply_alias_hint(tl: str, p: OSData) -> None:
|
|
48
|
+
for alias, normalized in MACOS_ALIASES.items():
|
|
49
|
+
if alias in tl:
|
|
50
|
+
parts = normalized.split()
|
|
51
|
+
if len(parts) == 2 and parts[1].isdigit():
|
|
52
|
+
p.version_major = int(parts[1])
|
|
53
|
+
p.precision = _max_precision(p.precision, "major")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _apply_darwin_mapping(t: str, p: OSData) -> None:
|
|
57
|
+
m = DARWIN_RE.search(t)
|
|
58
|
+
if not m:
|
|
59
|
+
return
|
|
60
|
+
dmaj = int(m.group(1))
|
|
61
|
+
p.kernel_name = "darwin"
|
|
62
|
+
p.kernel_version = ".".join([g for g in m.groups() if g])
|
|
63
|
+
|
|
64
|
+
if dmaj in MACOS_DARWIN_MAP:
|
|
65
|
+
prod, ver, code = MACOS_DARWIN_MAP[dmaj]
|
|
66
|
+
p.product = prod
|
|
67
|
+
if ver.isdigit():
|
|
68
|
+
p.version_major = int(ver)
|
|
69
|
+
p.precision = _max_precision(p.precision, "major")
|
|
70
|
+
else:
|
|
71
|
+
x, y, *_ = ver.split(".")
|
|
72
|
+
p.version_major = int(x)
|
|
73
|
+
p.version_minor = int(y)
|
|
74
|
+
p.precision = _max_precision(p.precision, "minor")
|
|
75
|
+
p.codename = code
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _apply_version_fallback(t: str, p: OSData) -> None:
|
|
79
|
+
if p.version_major:
|
|
80
|
+
return
|
|
81
|
+
mm = MACOS_VER_FALLBACK_RE.search(t)
|
|
82
|
+
if not mm:
|
|
83
|
+
return
|
|
84
|
+
p.version_major = int(mm.group(1))
|
|
85
|
+
if mm.group(2):
|
|
86
|
+
p.version_minor = int(mm.group(2))
|
|
87
|
+
p.precision = _max_precision(p.precision, "minor")
|
|
88
|
+
else:
|
|
89
|
+
p.precision = _max_precision(p.precision, "major")
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _apply_codename_fallback(tl: str, p: OSData) -> None:
|
|
93
|
+
if p.codename:
|
|
94
|
+
return
|
|
95
|
+
for dmaj, (_, ver, code) in MACOS_DARWIN_MAP.items():
|
|
96
|
+
if code.lower() in tl:
|
|
97
|
+
p.codename = code
|
|
98
|
+
# Provide at least major version from the map
|
|
99
|
+
if isinstance(ver, str) and ver.isdigit():
|
|
100
|
+
p.version_major = int(ver)
|
|
101
|
+
p.precision = _max_precision(p.precision, "major")
|
|
102
|
+
elif isinstance(ver, str) and "." in ver:
|
|
103
|
+
x, *_ = ver.split(".")
|
|
104
|
+
if x.isdigit():
|
|
105
|
+
p.version_major = int(x)
|
|
106
|
+
p.precision = _max_precision(p.precision, "major")
|
|
107
|
+
break
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _max_precision(current: str, new_label: str) -> str:
|
|
111
|
+
return new_label if _PRECISION_ORDER.get(new_label, 0) > _PRECISION_ORDER.get(current, 0) else current
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Mobile device specific parsing logic."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from os_normalizer.helpers import (
|
|
6
|
+
parse_semver_like,
|
|
7
|
+
precision_from_parts,
|
|
8
|
+
update_confidence,
|
|
9
|
+
)
|
|
10
|
+
from os_normalizer.models import OSData
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def parse_mobile(text: str, data: dict[str, Any], p: OSData) -> OSData:
|
|
14
|
+
"""Populate an OSData instance with mobile device-specific details."""
|
|
15
|
+
t = text.lower()
|
|
16
|
+
|
|
17
|
+
# Detect if it's iOS or Android
|
|
18
|
+
if "ios" in t or "ipados" in t:
|
|
19
|
+
p.product = "iOS/iPadOS"
|
|
20
|
+
p.vendor = "Apple"
|
|
21
|
+
elif "android" in t:
|
|
22
|
+
p.product = "Android"
|
|
23
|
+
p.vendor = "Google"
|
|
24
|
+
else:
|
|
25
|
+
# Default for unknown mobile OS
|
|
26
|
+
p.product = "Mobile OS"
|
|
27
|
+
p.vendor = None
|
|
28
|
+
|
|
29
|
+
# Extract version info using semver-like parsing
|
|
30
|
+
x, y, z = parse_semver_like(t)
|
|
31
|
+
p.version_major, p.version_minor, p.version_patch = x, y, z
|
|
32
|
+
p.precision = precision_from_parts(x, y, z, None) if x else "product"
|
|
33
|
+
|
|
34
|
+
# Boost confidence based on precision
|
|
35
|
+
update_confidence(p, p.precision)
|
|
36
|
+
|
|
37
|
+
return p
|