os-normalizer 0.4.0__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of os-normalizer might be problematic. Click here for more details.

Files changed (33) hide show
  1. os_normalizer-0.4.1/.uv-cache/.gitignore +1 -0
  2. os_normalizer-0.4.1/.uv-cache/.lock +0 -0
  3. os_normalizer-0.4.1/.uv-cache/CACHEDIR.TAG +1 -0
  4. os_normalizer-0.4.1/.uv-cache/interpreter-v4/7e11d242fb84b9e8/939db8dea853eb17.msgpack +0 -0
  5. os_normalizer-0.4.1/.uv-cache/sdists-v9/.git +0 -0
  6. os_normalizer-0.4.1/.uv-cache/sdists-v9/.gitignore +0 -0
  7. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/CHANGELOG.md +25 -1
  8. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/PKG-INFO +8 -7
  9. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/README.md +7 -6
  10. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/os_normalizer/constants.py +57 -28
  11. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/os_normalizer/cpe.py +58 -11
  12. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/os_normalizer/models.py +16 -0
  13. os_normalizer-0.4.1/os_normalizer/parsers/windows.py +274 -0
  14. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/pyproject.toml +1 -1
  15. os_normalizer-0.4.0/os_normalizer/parsers/windows.py +0 -246
  16. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/.gitignore +0 -0
  17. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/.python-version +0 -0
  18. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/LICENSE +0 -0
  19. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/RELEASING.md +0 -0
  20. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/os_normalizer/__init__.py +0 -0
  21. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/os_normalizer/helpers.py +0 -0
  22. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/os_normalizer/os_normalizer.py +0 -0
  23. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/os_normalizer/parsers/__init__.py +0 -0
  24. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/os_normalizer/parsers/bsd.py +0 -0
  25. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/os_normalizer/parsers/linux.py +0 -0
  26. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/os_normalizer/parsers/macos.py +0 -0
  27. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/os_normalizer/parsers/mobile.py +0 -0
  28. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/os_normalizer/parsers/network/__init__.py +0 -0
  29. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/os_normalizer/parsers/network/cisco.py +0 -0
  30. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/os_normalizer/parsers/network/fortinet.py +0 -0
  31. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/os_normalizer/parsers/network/huawei.py +0 -0
  32. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/os_normalizer/parsers/network/juniper.py +0 -0
  33. {os_normalizer-0.4.0 → os_normalizer-0.4.1}/os_normalizer/parsers/network/netgear.py +0 -0
@@ -0,0 +1 @@
1
+ *
File without changes
@@ -0,0 +1 @@
1
+ Signature: 8a477f597d28d172789f06886806bc55
File without changes
File without changes
@@ -3,6 +3,26 @@
3
3
  All notable changes to this project are documented here.
4
4
  This file adheres to Keep a Changelog and Semantic Versioning.
5
5
 
6
+ ## [0.4.1] - 2025-09-25
7
+
8
+ - Added: Broadened Windows product aliases (e.g., Win11, Win2k) and recognized the macOS 26 codename `Tahoe`.
9
+ - Changed: Windows normalization now always fills NT version major/minor fields and treats marketing releases (21H2/24H2/etc.) as the kernel version for richer telemetry.
10
+ - Changed: Windows CPE generation derives release-channel tokens from build numbers so clients emit `windows_10_21h2`, `windows_11_24h2`, and similar slugs automatically.
11
+ - Fixed: Windows 11 and Server 2025 inputs no longer fall back to Windows 10 defaults, and legacy server builds keep their correct product names.
12
+ - Fixed: Windows `OSData` string formatting avoids duplicate fragments and skips redundant kernel numbers when printing friendly names.
13
+
14
+ ## [0.4.0] - 2025-09-22
15
+
16
+ - Changed: Windows parser now normalizes typoed `windws` tokens, infers server editions from `Windows 2008/2012/2003` strings, and derives build numbers from generic `6.x.yyyy` patterns.
17
+ - Fixed: Windows Server 2012 R2 and older NT-based servers are correctly identified when only kernel/build identifiers are present.
18
+ - Fixed: OSData now always captures `kernel_version`/`version_build` for telemetry-only inputs.
19
+
20
+ ## [0.3.4] - 2025-09-21
21
+
22
+ - Added: Extensive Windows kernel/build fixtures (e.g., `Windows 7601 6.1.7601 ...`) to lock in parsing of raw telemetry strings.
23
+ - Added: Coverage for Windows Server kernel identifiers to ensure server products are emitted with matching CPE metadata.
24
+ - Added: Regression tests for Redstone/Windows 10 marketing channels and mixed-case architecture tokens.
25
+
6
26
  ## [0.3.3] - 2025-09-21
7
27
 
8
28
  - Added: `tests/case_utils.py` to share parametrization helpers across suites.
@@ -40,8 +60,12 @@ This file adheres to Keep a Changelog and Semantic Versioning.
40
60
 
41
61
  - Initial release.
42
62
 
43
- [Unreleased]: https://github.com/johnscillieri/os-normalizer/compare/v0.3.3...HEAD
63
+ [Unreleased]: https://github.com/johnscillieri/os-normalizer/compare/v0.4.1...HEAD
64
+ [0.4.1]: https://github.com/johnscillieri/os-normalizer/compare/v0.4.0...v0.4.1
65
+ [0.4.0]: https://github.com/johnscillieri/os-normalizer/compare/v0.3.4...v0.4.0
66
+ [0.3.4]: https://github.com/johnscillieri/os-normalizer/compare/v0.3.3...v0.3.4
44
67
  [0.3.3]: https://github.com/johnscillieri/os-normalizer/compare/v0.3.2...v0.3.3
68
+ [0.3.2]: https://github.com/johnscillieri/os-normalizer/compare/v0.3.1...v0.3.2
45
69
  [0.3.1]: https://github.com/johnscillieri/os-normalizer/compare/v0.3.0...v0.3.1
46
70
  [0.3.0]: https://github.com/johnscillieri/os-normalizer/compare/v0.2.0...v0.3.0
47
71
  [0.2.0]: https://github.com/johnscillieri/os-normalizer/compare/v0.1.0...v0.2.0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: os-normalizer
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Summary: Normalize raw OS strings/metadata into structured data (family, product, version, arch).
5
5
  Project-URL: Homepage, https://github.com/johnscillieri/os-normalizer
6
6
  Project-URL: Repository, https://github.com/johnscillieri/os-normalizer
@@ -49,7 +49,7 @@ A Python library for identifying and parsing operating system information from v
49
49
 
50
50
  ## Overview
51
51
 
52
- The OS Normalizer library parses raw operating system strings and JSON data to identify the OS family, version, architecture, and other details. It supports parsing of:
52
+ The OS Normalizer library parses raw operating system strings and supplimental data to identify the OS family, version, architecture, and other details. It supports parsing of:
53
53
 
54
54
  - Windows (NT builds, versions)
55
55
  - macOS (Darwin versions, codenames)
@@ -80,17 +80,18 @@ print(result.product) # Windows 11
80
80
  print(result.version_major) # 11
81
81
  ```
82
82
 
83
- ### Using Raw OS JSON Data
83
+ ### Using OS Release Data
84
84
 
85
85
  ```python
86
86
  from os_normalizer import normalize_os
87
87
 
88
- # Fingerprint with both raw string and JSON data
88
+ # Fingerprint with both raw string and contents of os-release file
89
89
  raw_os_string="Linux host 5.15.0-122-generic x86_64"
90
- raw_os_json={
91
- "os_release": 'NAME="Ubuntu"\nID=ubuntu\nVERSION_ID="22.04.4"\nVERSION_CODENAME=jammy\nPRETTY_NAME="Ubuntu 22.04.4 LTS"'}
90
+ os_release_data={
91
+ "os_release": 'NAME="Ubuntu"\nID=ubuntu\nVERSION_ID="22.04.4"\nVERSION_CODENAME=jammy\nPRETTY_NAME="Ubuntu 22.04.4 LTS"'
92
+ }
92
93
 
93
- result = normalize_os(raw_os_string, raw_os_json)
94
+ result = normalize_os(raw_os_string, os_release_data)
94
95
  print(result.family) # linux
95
96
  print(result.product) # Ubuntu
96
97
  print(result.codename) # Jammy
@@ -4,7 +4,7 @@ A Python library for identifying and parsing operating system information from v
4
4
 
5
5
  ## Overview
6
6
 
7
- The OS Normalizer library parses raw operating system strings and JSON data to identify the OS family, version, architecture, and other details. It supports parsing of:
7
+ The OS Normalizer library parses raw operating system strings and supplimental data to identify the OS family, version, architecture, and other details. It supports parsing of:
8
8
 
9
9
  - Windows (NT builds, versions)
10
10
  - macOS (Darwin versions, codenames)
@@ -35,17 +35,18 @@ print(result.product) # Windows 11
35
35
  print(result.version_major) # 11
36
36
  ```
37
37
 
38
- ### Using Raw OS JSON Data
38
+ ### Using OS Release Data
39
39
 
40
40
  ```python
41
41
  from os_normalizer import normalize_os
42
42
 
43
- # Fingerprint with both raw string and JSON data
43
+ # Fingerprint with both raw string and contents of os-release file
44
44
  raw_os_string="Linux host 5.15.0-122-generic x86_64"
45
- raw_os_json={
46
- "os_release": 'NAME="Ubuntu"\nID=ubuntu\nVERSION_ID="22.04.4"\nVERSION_CODENAME=jammy\nPRETTY_NAME="Ubuntu 22.04.4 LTS"'}
45
+ os_release_data={
46
+ "os_release": 'NAME="Ubuntu"\nID=ubuntu\nVERSION_ID="22.04.4"\nVERSION_CODENAME=jammy\nPRETTY_NAME="Ubuntu 22.04.4 LTS"'
47
+ }
47
48
 
48
- result = normalize_os(raw_os_string, raw_os_json)
49
+ result = normalize_os(raw_os_string, os_release_data)
49
50
  print(result.family) # linux
50
51
  print(result.product) # Ubuntu
51
52
  print(result.codename) # Jammy
@@ -25,10 +25,11 @@ ARCH_SYNONYMS = {
25
25
  WINDOWS_BUILD_MAP = [
26
26
  # NT era (pre-Windows 10)
27
27
  (1381, 1381, "Windows NT 4.0", "RTM"),
28
+ (1998, 1998, "Windows 98", "RTM"),
28
29
  (2195, 2195, "Windows 2000", "RTM"),
29
30
  (2600, 2600, "Windows XP", "RTM"),
30
- # NT 5.2 builds are ambiguous (XP x64 vs Server 2003); keep consistent label
31
- (3790, 3790, "Windows XP x64/Server 2003", "RTM"),
31
+ (3000, 3000, "Windows Me", "RTM"),
32
+ (3790, 3790, "Windows XP x64", "RTM"),
32
33
  # Vista/7/8/8.1
33
34
  (6000, 6000, "Windows Vista", "RTM"),
34
35
  (6001, 6001, "Windows Vista", "SP1"),
@@ -45,8 +46,13 @@ WINDOWS_BUILD_MAP = [
45
46
  (16299, 16299, "Windows 10", "1709"),
46
47
  (17134, 17134, "Windows 10", "1803"),
47
48
  (17763, 17763, "Windows 10", "1809"),
48
- (18362, 18363, "Windows 10", "1903/1909"),
49
- (19041, 19045, "Windows 10", "2004/20H2/21H1/21H2/22H2"),
49
+ (18362, 18362, "Windows 10", "1903"),
50
+ (18363, 18363, "Windows 10", "1909"),
51
+ (19041, 19041, "Windows 10", "2004"),
52
+ (19042, 19042, "Windows 10", "20H2"),
53
+ (19043, 19043, "Windows 10", "21H1"),
54
+ (19044, 19044, "Windows 10", "21H2"),
55
+ (19045, 19045, "Windows 10", "22H2"),
50
56
  # Windows 11
51
57
  (22000, 22000, "Windows 11", "21H2"),
52
58
  (22621, 22621, "Windows 11", "22H2"),
@@ -54,12 +60,35 @@ WINDOWS_BUILD_MAP = [
54
60
  (26100, 26199, "Windows 11", "24H2"),
55
61
  ]
56
62
 
63
+ # Windows Server build map (build number range -> product name, marketing channel)
64
+ # This is consulted only when the input looks server-like or when an explicit
65
+ # Windows Server product is already detected. Client mapping will not override
66
+ # explicit server detections.
67
+ WINDOWS_SERVER_BUILD_MAP = [
68
+ # Legacy server releases aligned with Vista/7/8/8.1
69
+ (3790, 3790, "Windows Server 2003", "RTM"),
70
+ (6001, 6001, "Windows Server 2008", "RTM"), # 6001 corresponds to 2008 RTM
71
+ (6002, 6002, "Windows Server 2008", "SP2"),
72
+ (7600, 7600, "Windows Server 2008 R2", "RTM"),
73
+ (7601, 7601, "Windows Server 2008 R2", "SP1"),
74
+ (9200, 9200, "Windows Server 2012", "RTM"),
75
+ (9600, 9600, "Windows Server 2012 R2", "RTM"),
76
+ # NT 10.0 based server releases
77
+ (14393, 14393, "Windows Server 2016", "1607"),
78
+ (17763, 17763, "Windows Server 2019", "1809"),
79
+ (20348, 20348, "Windows Server 2022", "21H2"),
80
+ # Windows Server 2025 (vNext) uses the 26100 train alongside client 24H2
81
+ (26100, 26199, "Windows Server 2025", "24H2"),
82
+ ]
83
+
57
84
  # Windows NT version tuple -> client product (ambiguous NT 6.x split out)
58
85
  WINDOWS_NT_CLIENT_MAP = {
59
86
  (4, 0): "Windows NT 4.0",
87
+ (4, 10): "Windows 98",
88
+ (4, 90): "Windows Me",
60
89
  (5, 0): "Windows 2000",
61
90
  (5, 1): "Windows XP",
62
- (5, 2): "Windows XP x64/Server 2003", # NT 5.2 often maps to XP x64 on client
91
+ (5, 2): "Windows XP x64",
63
92
  (6, 0): "Windows Vista",
64
93
  (6, 1): "Windows 7",
65
94
  (6, 2): "Windows 8",
@@ -71,7 +100,6 @@ WINDOWS_NT_CLIENT_MAP = {
71
100
  WINDOWS_NT_SERVER_MAP = {
72
101
  (4, 0): "Windows NT 4.0 Server",
73
102
  (5, 0): "Windows 2000 Server",
74
- (5, 1): "Windows XP/Server 2003", # rarely used for server detection
75
103
  (5, 2): "Windows Server 2003",
76
104
  (6, 0): "Windows Server 2008",
77
105
  (6, 1): "Windows Server 2008 R2",
@@ -80,10 +108,32 @@ WINDOWS_NT_SERVER_MAP = {
80
108
  # NT 10.0: Server 2016/2019/2022 detected via explicit names, not NT mapping
81
109
  }
82
110
 
111
+
112
+ WINDOWS_PRODUCT_PATTERNS: list[tuple[str, tuple[str, ...]]] = [
113
+ ("Windows 11", ("windows 11", "win11")),
114
+ ("Windows 10", ("windows 10", "win10")),
115
+ ("Windows 8.1", ("windows 8.1", "win81")),
116
+ ("Windows 8", ("windows 8", "win8")),
117
+ ("Windows 7", ("windows 7", "win7")),
118
+ ("Windows ME", ("windows me", "windows millenium")),
119
+ ("Windows 98", ("windows 98", "win98")),
120
+ ("Windows Server 2022", ("windows server 2022", "windows 2022", "win2k22", "win2022")),
121
+ ("Windows Server 2019", ("windows server 2019", "windows 2019", "win2k19", "win2019")),
122
+ ("Windows Server 2016", ("windows server 2016", "windows 2016", "win2k16", "win2016")),
123
+ ("Windows Server 2012 R2", ("windows server 2012 r2", "windows 2012 r2", "win2k12r2", "win2012r2")),
124
+ ("Windows Server 2012", ("windows server 2012", "windows 2012", "win2k12", "win2012")),
125
+ ("Windows Server 2008 R2", ("windows server 2008 r2", "windows 2008 r2", "win2k8r2", "win2008r2")),
126
+ ("Windows Server 2008", ("windows server 2008", "windows 2008", "win2k8", "win2008")),
127
+ ("Windows Server 2003", ("windows server 2003", "windows 2003", "win2k3", "win2003")),
128
+ ("Windows Server 2000", ("windows server 2000", "windows 2000", "win2k", "win2000")),
129
+ ]
130
+
131
+
83
132
  # Human readable aliases (macOS codenames)
84
133
  MACOS_ALIASES = {
85
- "sonoma": "macOS 14",
134
+ "tahoe": "macOS 26",
86
135
  "sequoia": "macOS 15",
136
+ "sonoma": "macOS 14",
87
137
  "ventura": "macOS 13",
88
138
  "monterey": "macOS 12",
89
139
  "big sur": "macOS 11",
@@ -102,26 +152,5 @@ MACOS_DARWIN_MAP = {
102
152
  25: ("macOS", "26", "Tahoe"),
103
153
  }
104
154
 
105
- # Windows Server build map (build number range -> product name, marketing channel)
106
- # This is consulted only when the input looks server-like or when an explicit
107
- # Windows Server product is already detected. Client mapping will not override
108
- # explicit server detections.
109
- WINDOWS_SERVER_BUILD_MAP = [
110
- # Legacy server releases aligned with Vista/7/8/8.1
111
- (3790, 3790, "Windows Server 2003", "RTM"),
112
- (6001, 6001, "Windows Server 2008", "RTM"), # 6001 corresponds to 2008 RTM
113
- (6002, 6002, "Windows Server 2008", "SP2"),
114
- (7600, 7600, "Windows Server 2008 R2", "RTM"),
115
- (7601, 7601, "Windows Server 2008 R2", "SP1"),
116
- (9200, 9200, "Windows Server 2012", "RTM"),
117
- (9600, 9600, "Windows Server 2012 R2", "RTM"),
118
- # NT 10.0 based server releases
119
- (14393, 14393, "Windows Server 2016", "1607"),
120
- (17763, 17763, "Windows Server 2019", "1809"),
121
- (20348, 20348, "Windows Server 2022", "21H2"),
122
- # Windows Server 2025 (vNext) uses the 26100 train alongside client 24H2
123
- (26100, 26199, "Windows Server 2025", "24H2"),
124
- ]
125
-
126
155
  # Cisco train names (used for codename detection)
127
156
  CISCO_TRAIN_NAMES = {"Everest", "Fuji", "Gibraltar", "Amsterdam", "Denali"}
@@ -9,6 +9,8 @@ from __future__ import annotations
9
9
 
10
10
  from typing import TYPE_CHECKING
11
11
 
12
+ from os_normalizer.constants import WINDOWS_BUILD_MAP
13
+
12
14
  if TYPE_CHECKING:
13
15
  from .models import OSData
14
16
 
@@ -39,7 +41,7 @@ def _map_vendor_product(p: OSData) -> tuple[str, str, str]:
39
41
  """
40
42
  fam = (p.family or "").lower()
41
43
  vendor = (p.vendor or "").lower() if p.vendor else None
42
- product = (p.product or "").lower() if p.product else None
44
+ product = (p.product or "").lower() if p.product else ""
43
45
 
44
46
  # Windows
45
47
  if fam == "windows":
@@ -59,9 +61,10 @@ def _map_vendor_product(p: OSData) -> tuple[str, str, str]:
59
61
  "windows server 2022": "windows_server_2022",
60
62
  }
61
63
  base = prod_map.get(product, "windows")
62
- # For Windows 10/11, append channel (e.g., _23h2) when available
63
- if base in ("windows_10", "windows_11") and p.channel:
64
- base = f"{base}_{p.channel.lower()}"
64
+ if base in ("windows_10", "windows_11"):
65
+ token = _windows_channel_token(p)
66
+ if token:
67
+ base = f"{base}_{token}"
65
68
  return vtok, base, "windows"
66
69
 
67
70
  # macOS
@@ -130,18 +133,26 @@ def _fmt_version(p: OSData, strategy: str) -> tuple[str, str, str]:
130
133
  """Return (version, update, edition) strings per strategy."""
131
134
  maj, minr, pat = p.version_major, p.version_minor, p.version_patch
132
135
  build = p.version_build
133
- channel = (p.channel or "").lower() if p.channel else None
134
136
  edition = (p.edition or "").lower() if p.edition else None
135
137
 
136
138
  if strategy == "windows":
137
- # Product may include channel; version should reflect NT kernel + build when known
138
- if p.kernel_version:
139
- ver = p.kernel_version
139
+ patch = p.version_patch
140
+ if p.version_major is not None and p.version_minor is not None:
141
+ base = f"{p.version_major}.{p.version_minor}"
142
+ if p.version_build:
143
+ ver = f"{base}.{p.version_build}"
144
+ if patch not in (None, 0):
145
+ ver = f"{ver}.{patch}"
146
+ else:
147
+ ver = base
148
+ if patch not in (None, 0):
149
+ ver = f"{ver}.{patch}"
140
150
  elif p.version_build:
141
- nt = p.evidence.get("nt_version") if isinstance(p.evidence, dict) else None
142
- ver = f"{nt}.{p.version_build}" if nt else p.version_build
151
+ ver = p.version_build
152
+ if patch not in (None, 0):
153
+ ver = f"{ver}.{patch}"
143
154
  else:
144
- ver = "*"
155
+ ver = p.kernel_version or "*"
145
156
  return ver, "*", "*"
146
157
 
147
158
  if strategy == "ubuntu":
@@ -220,6 +231,42 @@ def _fmt_version(p: OSData, strategy: str) -> tuple[str, str, str]:
220
231
  return ver, "*", "*"
221
232
 
222
233
 
234
+ def _windows_channel_token(p: OSData) -> str | None:
235
+ known_tokens = {
236
+ "24h2",
237
+ "23h2",
238
+ "22h2",
239
+ "21h2",
240
+ "21h1",
241
+ "20h2",
242
+ "2004",
243
+ "1909",
244
+ "1903",
245
+ "1809",
246
+ "1803",
247
+ "1709",
248
+ "1703",
249
+ "1607",
250
+ "1511",
251
+ "1507",
252
+ }
253
+
254
+ kv = (p.kernel_version or "").lower()
255
+ if kv in known_tokens:
256
+ return kv
257
+
258
+ vb = p.version_build
259
+ if vb and vb.isdigit():
260
+ build = int(vb)
261
+ for lo, hi, _product, marketing in WINDOWS_BUILD_MAP:
262
+ if lo <= build <= hi and marketing:
263
+ token = marketing.split('/')[-1].lower()
264
+ if token:
265
+ return token
266
+
267
+ return None
268
+
269
+
223
270
  def _cpe_target_hw(arch: str | None) -> str:
224
271
  if not arch:
225
272
  return "*"
@@ -47,6 +47,9 @@ class OSData:
47
47
  def __str__(self) -> str: # pragma: no cover - formatting helper
48
48
  parts: list[str] = []
49
49
 
50
+ if self.family == "windows":
51
+ return _format_windows(self)
52
+
50
53
  # Prefer vendor + product; fallback to pretty_name; then family
51
54
  name_bits = [x for x in (self.vendor, self.product) if x]
52
55
  if name_bits:
@@ -141,6 +144,19 @@ class OSData:
141
144
  return "\n".join(lines)
142
145
 
143
146
 
147
+ def _format_windows(p: OSData) -> str:
148
+ parts = [
149
+ p.vendor,
150
+ p.product,
151
+ p.edition,
152
+ p.codename,
153
+ f"({p.kernel_version})" if p.kernel_version != f"{p.version_major}.{p.version_minor}" else "",
154
+ f"{p.version_major}.{p.version_minor}.{p.version_build}",
155
+ p.arch,
156
+ ]
157
+ return " ".join(part for part in parts if part)
158
+
159
+
144
160
  if __name__ == "__main__":
145
161
  x = OSData(
146
162
  family="linux",
@@ -0,0 +1,274 @@
1
+ """Windows specific parsing logic."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from dataclasses import dataclass
7
+ from typing import TYPE_CHECKING, Any
8
+
9
+ from os_normalizer.constants import (
10
+ WINDOWS_BUILD_MAP,
11
+ WINDOWS_NT_CLIENT_MAP,
12
+ WINDOWS_NT_SERVER_MAP,
13
+ WINDOWS_PRODUCT_PATTERNS,
14
+ WINDOWS_SERVER_BUILD_MAP,
15
+ )
16
+ from os_normalizer.helpers import extract_arch_from_text, update_confidence
17
+
18
+ if TYPE_CHECKING:
19
+ from os_normalizer.models import OSData
20
+
21
+ VERSION_PATTERN = re.compile(r"\b(\d+)\.(\d+)\.(\d+)(?:\.(\d+))?\b")
22
+ NT_PATTERN = re.compile(r"\bnt\s*(\d+)(?:\.(\d+))?", re.IGNORECASE)
23
+ BUILD_PATTERN = re.compile(r"\bbuild\s*(?:number\s*)?(\d{3,5})\b", re.IGNORECASE)
24
+ SP_PATTERN = re.compile(r"\bsp(\d)\b", re.IGNORECASE)
25
+
26
+ EDITION_KEYWORDS: list[tuple[str, str]] = [
27
+ ("iot enterprise", "Enterprise"),
28
+ ("enterprise", "Enterprise"),
29
+ ("education", "Education"),
30
+ ("datacenter", "Datacenter"),
31
+ ("standard", "Standard"),
32
+ ("professional", "Professional"),
33
+ (" pro ", "Professional"),
34
+ (" home ", "Home"),
35
+ ]
36
+
37
+
38
+ @dataclass(frozen=True)
39
+ class ProductDefaults:
40
+ version_major: int | None = None
41
+ version_minor: int | None = None
42
+ version_patch: int | None = None
43
+ version_build: str | None = None
44
+ kernel_version: str | None = None
45
+
46
+
47
+ PRODUCT_DEFAULTS: dict[str, ProductDefaults] = {
48
+ "Windows NT 4.0": ProductDefaults(4, 0, None, "1381", "4.0"),
49
+ "Windows 98": ProductDefaults(4, 10, None, "1998", "4.10"),
50
+ "Windows ME": ProductDefaults(4, 90, None, "3000", "4.9"),
51
+ "Windows 2000": ProductDefaults(5, 0, None, "2195", "5.0"),
52
+ "Windows XP": ProductDefaults(5, 1, None, "2600", "5.1"),
53
+ "Windows Vista": ProductDefaults(6, 0, None, "6000", "6.0"),
54
+ "Windows Vista SP1": ProductDefaults(6, 0, None, "6001", "6.0"),
55
+ "Windows Vista SP2": ProductDefaults(6, 0, None, "6002", "6.0"),
56
+ "Windows 7": ProductDefaults(6, 1, None, "7601", "6.1"),
57
+ "Windows 7 SP1": ProductDefaults(6, 1, None, "7601", "6.1"),
58
+ "Windows 7 SP2": ProductDefaults(6, 1, None, "7601", "6.1"),
59
+ "Windows 8": ProductDefaults(6, 2, None, "9200", "6.2"),
60
+ "Windows 8.1": ProductDefaults(6, 3, None, "9600", "6.3"),
61
+ "Windows 10": ProductDefaults(10, 0, None, None, None),
62
+ "Windows 11": ProductDefaults(10, 0, None, None, None),
63
+ "Windows Server 2003": ProductDefaults(5, 2, None, "3790", "5.2"),
64
+ "Windows Server 2008": ProductDefaults(6, 0, None, "6002", "6.0"),
65
+ "Windows Server 2008 R2": ProductDefaults(6, 1, None, "7600", "6.1"),
66
+ "Windows Server 2008 R2 SP1": ProductDefaults(6, 1, None, "7601", "6.1"),
67
+ "Windows Server 2012": ProductDefaults(6, 2, None, "9200", "6.2"),
68
+ "Windows Server 2012 R2": ProductDefaults(6, 3, None, "9600", "6.3"),
69
+ "Windows Server 2016": ProductDefaults(10, 0, None, None, None),
70
+ "Windows Server 2019": ProductDefaults(10, 0, None, None, None),
71
+ "Windows Server 2022": ProductDefaults(10, 0, None, None, None),
72
+ "Windows Server 2025": ProductDefaults(10, 0, None, None, None),
73
+ }
74
+
75
+
76
+ @dataclass
77
+ class VersionState:
78
+ """Intermediate container for NT/build details discovered in the banner."""
79
+
80
+ nt_major: int | None = None
81
+ nt_minor: int | None = None
82
+ build: str | None = None
83
+ patch: int | None = None
84
+ channel: str | None = None
85
+ explicit: bool = False
86
+
87
+
88
+ def parse_windows(text: str, data: dict[str, Any], p: OSData) -> OSData:
89
+ """Populate an OSData instance with Windows-specific details."""
90
+ tl = text.lower()
91
+
92
+ p.vendor = "Microsoft"
93
+ p.kernel_name = "nt"
94
+ p.arch = extract_arch_from_text(tl)
95
+ p.edition = _detect_edition(tl)
96
+
97
+ product = _detect_product(tl)
98
+ server_hint = _initial_server_hint(tl, product)
99
+ state = _extract_version_state(tl)
100
+ product, server_hint = _apply_build_context(state, product, server_hint)
101
+ p.product = _finalize_product_label(tl, product, state, server_hint)
102
+
103
+ defaults = PRODUCT_DEFAULTS.get(p.product or "")
104
+ _apply_version_numbers(p, defaults, state)
105
+ _set_kernel_version(p, defaults, state)
106
+ _finalize_precision_and_confidence(p, state)
107
+
108
+ return p
109
+
110
+
111
+ def _detect_product(text: str) -> str | None:
112
+ for product, patterns in WINDOWS_PRODUCT_PATTERNS:
113
+ for token in patterns:
114
+ if token in text:
115
+ return product
116
+ return None
117
+
118
+
119
+ def _initial_server_hint(tl: str, product: str | None) -> bool:
120
+ """Return True when the banner or product implies a Windows Server build."""
121
+ return "server" in tl or (product is not None and "server" in product.lower())
122
+
123
+
124
+ def _extract_version_state(text: str) -> VersionState:
125
+ """Collect NT version, build, and patch information from the banner."""
126
+ state = VersionState()
127
+
128
+ best = _select_best_version(text)
129
+ if best:
130
+ state.nt_major, state.nt_minor, state.build, state.patch = best
131
+ state.explicit = True
132
+
133
+ nt_match = NT_PATTERN.search(text)
134
+ if nt_match:
135
+ maj = int(nt_match.group(1))
136
+ minr = int(nt_match.group(2)) if nt_match.group(2) else 0
137
+ if state.nt_major is None:
138
+ state.nt_major = maj
139
+ state.nt_minor = minr
140
+ else:
141
+ state.nt_minor = state.nt_minor if state.nt_minor is not None else minr
142
+ state.explicit = True
143
+
144
+ if state.build is None:
145
+ build_match = BUILD_PATTERN.search(text)
146
+ if build_match:
147
+ state.build = str(int(build_match.group(1)))
148
+ state.explicit = True
149
+
150
+ return state
151
+
152
+
153
+ def _apply_build_context(state: VersionState, product: str | None, server_hint: bool) -> tuple[str | None, bool]:
154
+ """Use build numbers to infer product/channel metadata and refine server hint."""
155
+ build_num = int(state.build) if state.build and state.build.isdigit() else None
156
+ if build_num is None:
157
+ return product, server_hint
158
+
159
+ product_from_build, channel, is_server_build = _lookup_build(build_num, server_hint)
160
+ if product_from_build and not product:
161
+ product = product_from_build
162
+ if is_server_build:
163
+ server_hint = True
164
+ state.channel = channel
165
+ return product, server_hint
166
+
167
+
168
+ def _finalize_product_label(tl: str, product: str | None, state: VersionState, server_hint: bool) -> str | None:
169
+ """Resolve the most precise product name available for the banner."""
170
+ if product is None and state.nt_major is not None and state.nt_minor is not None:
171
+ product = _product_from_nt(state.nt_major, state.nt_minor, server_hint)
172
+
173
+ if product:
174
+ sp_match = SP_PATTERN.search(tl)
175
+ if sp_match and "windows 7" in product.lower():
176
+ product = f"Windows 7 SP{sp_match.group(1)}"
177
+
178
+ return product
179
+
180
+
181
+ def _apply_version_numbers(p: OSData, defaults: ProductDefaults, state: VersionState) -> None:
182
+ """Move version components from the parse state into the OSData payload."""
183
+ p.version_major = state.nt_major if state.nt_major is not None else defaults.version_major
184
+ p.version_minor = state.nt_minor if state.nt_minor is not None else defaults.version_minor
185
+ p.version_patch = state.patch if state.patch is not None else defaults.version_patch
186
+ p.version_build = state.build if state.build is not None else defaults.version_build
187
+
188
+
189
+ def _set_kernel_version(p: OSData, defaults: ProductDefaults | None, state: VersionState) -> None:
190
+ """Populate kernel_version using explicit tokens, build channel, or defaults."""
191
+ kernel_version: str | None = None
192
+ if state.explicit and state.nt_major is not None:
193
+ if state.nt_major >= 10 and state.channel:
194
+ kernel_version = state.channel
195
+ elif state.nt_minor is not None:
196
+ kernel_version = f"{state.nt_major}.{state.nt_minor}"
197
+ if kernel_version is None and defaults and defaults.kernel_version:
198
+ kernel_version = defaults.kernel_version
199
+ if kernel_version:
200
+ p.kernel_version = kernel_version
201
+
202
+
203
+ def _finalize_precision_and_confidence(p: OSData, state: VersionState) -> None:
204
+ """Derive precision/confidence and record evidence for explicit NT versions."""
205
+ p.precision = _derive_precision(p.version_major, p.version_minor, p.version_patch, p.version_build)
206
+
207
+ if state.explicit and state.nt_major is not None:
208
+ norm_major = min(10, state.nt_major)
209
+ norm_minor = state.nt_minor if state.nt_minor is not None else 0
210
+ p.evidence["nt_version"] = f"{norm_major}.{norm_minor}"
211
+
212
+ update_confidence(p, p.precision)
213
+
214
+
215
+ def _detect_edition(tl: str) -> str | None:
216
+ for token, label in EDITION_KEYWORDS:
217
+ if token.strip() in {"pro", "home"}:
218
+ pattern = rf"\b{token.strip()}\b"
219
+ if re.search(pattern, tl):
220
+ return label
221
+ elif token in tl:
222
+ return label
223
+ return None
224
+
225
+
226
+ def _select_best_version(text: str) -> tuple[int, int, str | None, int | None] | None:
227
+ best: tuple[int, int, str | None, int | None] | None = None
228
+ best_score = -1
229
+ for match in VERSION_PATTERN.finditer(text):
230
+ major, minor, build, patch = match.groups()
231
+ score = 2 if patch is not None else 1
232
+ if score > best_score:
233
+ best_score = score
234
+ bpatch = int(patch) if patch is not None else None
235
+ best = (int(major), int(minor), str(int(build)), bpatch)
236
+ return best
237
+
238
+
239
+ def _lookup_build(build_num: int, server_hint: bool) -> tuple[str | None, str | None, bool]:
240
+ candidate: tuple[str | None, str | None, bool] = (None, None, False)
241
+ tables_to_try: list[tuple[int, int, str, str]] = []
242
+ if server_hint:
243
+ tables_to_try.extend(WINDOWS_SERVER_BUILD_MAP)
244
+ tables_to_try.extend(WINDOWS_BUILD_MAP)
245
+ for start, end, prod, channel in tables_to_try:
246
+ if start <= build_num <= end:
247
+ is_server = prod.lower().startswith("windows server")
248
+ candidate = (prod, channel, is_server)
249
+ break
250
+ return candidate
251
+
252
+
253
+ def _product_from_nt(major: int, minor: int, server_hint: bool) -> str | None:
254
+ key = (major, minor)
255
+ if server_hint and key in WINDOWS_NT_SERVER_MAP:
256
+ return WINDOWS_NT_SERVER_MAP[key]
257
+ return WINDOWS_NT_CLIENT_MAP.get(key)
258
+
259
+
260
+ def _derive_precision(
261
+ major: int | None,
262
+ minor: int | None,
263
+ patch: int | None,
264
+ build: str | None,
265
+ ) -> str:
266
+ if build:
267
+ return "build"
268
+ if patch is not None and patch != 0:
269
+ return "patch"
270
+ if minor is not None:
271
+ return "minor"
272
+ if major is not None:
273
+ return "major"
274
+ return "product"
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "os-normalizer"
3
- version = "0.4.0"
3
+ version = "0.4.1"
4
4
  description = "Normalize raw OS strings/metadata into structured data (family, product, version, arch)."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -1,246 +0,0 @@
1
- """Windows specific parsing logic.
2
-
3
- Refactored for clarity: vendor/edition detection, NT mapping, and build
4
- mapping are handled by focused helpers. Behavior is preserved while
5
- avoiding ambiguous NT mappings when server signals are present.
6
- """
7
-
8
- import re
9
- from typing import Any, Optional
10
-
11
- from os_normalizer.constants import (
12
- WINDOWS_BUILD_MAP,
13
- WINDOWS_SERVER_BUILD_MAP,
14
- WINDOWS_NT_CLIENT_MAP,
15
- WINDOWS_NT_SERVER_MAP,
16
- )
17
- from os_normalizer.helpers import update_confidence
18
- from os_normalizer.models import OSData
19
-
20
- # Regex patterns used only by the Windows parser
21
- WIN_EDITION_RE = re.compile(
22
- r"\b(professional|pro|enterprise|home|education|ltsc|datacenter|standard)\b",
23
- re.IGNORECASE,
24
- )
25
- WIN_SP_RE = re.compile(r"\bSP\s?([0-9]+)\b", re.IGNORECASE)
26
- WIN_BUILD_RE = re.compile(r"\bbuild\s?(\d{4,6})\b", re.IGNORECASE)
27
- WIN_NT_RE = re.compile(r"\bnt\s?(\d+)\.(\d+)\b", re.IGNORECASE)
28
- WIN_FULL_NT_BUILD_RE = re.compile(r"\b(10)\.(0)\.(\d+)(?:\.(\d+))?\b")
29
- WIN_GENERIC_VERSION_RE = re.compile(r"\b(\d+)\.(\d+)\.(\d{3,6})(?:\.(\d+))?\b")
30
- WIN_CHANNEL_RE = re.compile(
31
- r"\b(24H2|23H2|22H2|21H2|21H1|20H2|2004|1909|1903|1809|1803|1709|1703|1607|1511|1507)\b",
32
- re.IGNORECASE,
33
- )
34
-
35
-
36
- def parse_windows(text: str, data: dict[str, Any], p: OSData) -> OSData:
37
- """Populate an OSData instance with Windows-specific details."""
38
- p.kernel_name = "nt"
39
-
40
- # 1) Product and edition from free text
41
- p.product = p.product or _detect_product_from_text(text.lower())
42
- p.edition = p.edition or _detect_edition(text)
43
-
44
- # 2) Service Pack
45
- _parse_service_pack(text, p)
46
-
47
- # 3) NT version mapping (client vs server)
48
- server_like = _is_server_like(text.lower())
49
- _apply_nt_mapping(text, p, server_like)
50
-
51
- # 4) Full kernel version (e.g., 10.0.22621.2715) + channel token if present
52
- _apply_full_kernel_and_channel(text, p)
53
-
54
- # 5) Build number + marketing channel (fallback when only 'build 22631' is present)
55
- _apply_build_mapping(text, p, server_like)
56
-
57
- # 6) Precision and version_major if applicable
58
- _finalize_precision_and_version(p)
59
-
60
- # 7) Vendor + confidence
61
- p.vendor = "Microsoft"
62
- update_confidence(p, p.precision)
63
- return p
64
-
65
-
66
- def _detect_product_from_text(t: str) -> str:
67
- # Normalize common typos before matching
68
- t = t.replace("windws", "windows")
69
-
70
- if "windows 11" in t or "win11" in t:
71
- return "Windows 11"
72
- if "windows 10" in t or "win10" in t:
73
- return "Windows 10"
74
- if "windows 8.1" in t or "win81" in t:
75
- return "Windows 8.1"
76
- if "windows 8" in t or "win8" in t:
77
- return "Windows 8"
78
- if "windows 7" in t or "win7" in t:
79
- return "Windows 7"
80
- if "windows me" in t or "windows millenium" in t:
81
- return "Windows ME"
82
- if "windows 98" in t or "win98" in t:
83
- return "Windows 98"
84
-
85
- # Server explicit names
86
- if "windows server 2012 r2" in t or "windows 2012 r2" in t or "win2k12r2" in t or "win2012r2" in t:
87
- return "Windows Server 2012 R2"
88
- if "windows server 2022" in t or "windows 2022" in t or "win2k22" in t or "win2022" in t:
89
- return "Windows Server 2022"
90
- if "windows server 2019" in t or "windows 2019" in t or "win2k19" in t or "win2019" in t:
91
- return "Windows Server 2019"
92
- if "windows server 2016" in t or "windows 2016" in t or "win2k16" in t or "win2016" in t:
93
- return "Windows Server 2016"
94
- if "windows server 2012" in t or "windows 2012" in t or "win2k12" in t or "win2012" in t:
95
- return "Windows Server 2012"
96
- if "windows server 2008 r2" in t or "windows 2008 r2" in t or "win2k8r2" in t or "win2008r2" in t:
97
- return "Windows Server 2008 R2"
98
- if "windows server 2008" in t or "windows 2008" in t or "win2k8" in t or "win2008" in t:
99
- return "Windows Server 2008"
100
- if "windows server 2003" in t or "windows 2003" in t or "win2k3" in t or "win2003" in t:
101
- return "Windows Server 2003"
102
- if "windows server 2000" in t or "windows 2000" in t or "win2k" in t or "win2000" in t:
103
- return "Windows Server 2000"
104
-
105
- if "windows" in t:
106
- return "Windows"
107
- return "Unknown"
108
-
109
-
110
- def _detect_edition(text: str) -> str | None:
111
- m = WIN_EDITION_RE.search(text)
112
- if not m:
113
- return None
114
- token = m.group(1).lower()
115
- norm = {
116
- "pro": "Professional",
117
- "professional": "Professional",
118
- "enterprise": "Enterprise",
119
- "home": "Home",
120
- "education": "Education",
121
- "ltsc": "LTSC",
122
- "datacenter": "Datacenter",
123
- "standard": "Standard",
124
- }
125
- return norm.get(token, token.title())
126
-
127
-
128
- def _parse_service_pack(text: str, p: OSData) -> None:
129
- sp = WIN_SP_RE.search(text)
130
- if sp:
131
- p.version_patch = int(sp.group(1))
132
- p.evidence["service_pack"] = sp.group(0)
133
-
134
-
135
- def _is_server_like(t: str) -> bool:
136
- return any(
137
- kw in t
138
- for kw in (
139
- "server",
140
- "datacenter",
141
- "standard",
142
- "essentials",
143
- "foundation",
144
- "core", # server core often appears
145
- "hyper-v",
146
- )
147
- )
148
-
149
-
150
- def _apply_nt_mapping(text: str, p: OSData, server_like: bool) -> None:
151
- nt = WIN_NT_RE.search(text)
152
- if not nt:
153
- return
154
- major, minor = int(nt.group(1)), int(nt.group(2))
155
- p.evidence["nt_version"] = f"{major}.{minor}"
156
-
157
- # If product already explicitly set (e.g., "Windows Server 2019"), keep it
158
- if p.product and p.product not in ("Windows", "Windows 10/11"):
159
- return
160
-
161
- product = WINDOWS_NT_SERVER_MAP.get((major, minor)) if server_like else WINDOWS_NT_CLIENT_MAP.get((major, minor))
162
- if product:
163
- p.product = product
164
-
165
-
166
- def _apply_build_mapping(text: str, p: OSData, server_like: bool) -> None:
167
- m = WIN_BUILD_RE.search(text)
168
- if not m:
169
- return
170
- build_num = int(m.group(1))
171
- p.version_build = str(build_num)
172
-
173
- # Kernel version string
174
- if not p.kernel_version:
175
- if (p.product == "Windows 10/11") or ("10.0" in text):
176
- p.kernel_version = f"10.0.{build_num}"
177
- else:
178
- nt_mm = WIN_NT_RE.search(text)
179
- if nt_mm:
180
- maj, minr = int(nt_mm.group(1)), int(nt_mm.group(2))
181
- p.kernel_version = f"{maj}.{minr}.{build_num}"
182
-
183
- is_server_product = isinstance(p.product, str) and "server" in p.product.lower()
184
- if is_server_product or server_like:
185
- # Apply server build mapping; do not override explicit server product names
186
- for lo, hi, product_name, marketing in WINDOWS_SERVER_BUILD_MAP:
187
- if lo <= build_num <= hi:
188
- if not p.product or p.product in ("Windows", "Windows 10/11"):
189
- p.product = product_name
190
- # Only set channel for modern Server (2016+)
191
- if build_num >= 14393:
192
- p.channel = p.channel or marketing
193
- break
194
- else:
195
- # Apply client build mapping
196
- for lo, hi, product_name, marketing in WINDOWS_BUILD_MAP:
197
- if lo <= build_num <= hi:
198
- # Only use build map to set product for Windows 10/11 trains
199
- if build_num >= 10240:
200
- p.product = product_name
201
- p.channel = p.channel or marketing
202
- break
203
-
204
-
205
- def _apply_full_kernel_and_channel(text: str, p: OSData) -> None:
206
- # Full NT kernel version, e.g., 10.0.22621.2715
207
- m = WIN_FULL_NT_BUILD_RE.search(text)
208
- if m:
209
- build = m.group(3)
210
- suffix = m.group(4)
211
- p.version_build = p.version_build or build
212
- p.kernel_version = f"10.0.{build}{('.' + suffix) if suffix else ''}"
213
- # Record evidence for NT 10.0 if not set via NT mapping
214
- p.evidence.setdefault("nt_version", "10.0")
215
-
216
- # Marketing channel token in free text, e.g., 22H2 (case-insensitive)
217
- ch = WIN_CHANNEL_RE.search(text)
218
- if ch and not p.channel:
219
- p.channel = ch.group(1).upper()
220
-
221
- if not p.kernel_version:
222
- m2 = WIN_GENERIC_VERSION_RE.search(text)
223
- if m2:
224
- major, minor, build, suffix = m2.groups()
225
- p.kernel_version = f"{major}.{minor}.{build}{('.' + suffix) if suffix else ''}"
226
- p.version_build = p.version_build or build
227
- p.evidence.setdefault("nt_version", f"{major}.{minor}")
228
-
229
-
230
- def _finalize_precision_and_version(p: OSData) -> None:
231
- if p.version_build:
232
- p.precision = "build"
233
- return
234
- if p.version_patch is not None:
235
- p.precision = "patch"
236
- return
237
- if p.product and any(x in p.product for x in ("7", "8", "10", "11")):
238
- digits = re.findall(r"\d+", p.product)
239
- if digits:
240
- p.version_major = int(digits[0])
241
- p.precision = "major"
242
- return
243
- if p.product:
244
- p.precision = "product"
245
- else:
246
- p.precision = "family"
File without changes
File without changes