napt 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,467 @@
1
+ # Copyright 2025 Roger Cibrian
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Web scraping discovery strategy for NAPT.
16
+
17
+ This is a VERSION-FIRST strategy that scrapes vendor download pages to find
18
+ download links and extract version information from those links. This enables
19
+ version discovery for vendors that don't provide APIs or static URLs.
20
+
21
+ Key Advantages:
22
+
23
+ - Discovers versions from vendor download pages
24
+ - Works for vendors without APIs or GitHub releases
25
+ - Version-first caching (can skip downloads when version unchanged)
26
+ - Supports both CSS selectors (recommended) and regex (fallback)
27
+ - No dependency on HTML structure stability (with good selectors)
28
+ - Handles relative and absolute URLs automatically
29
+
30
+ Supported Link Finding:
31
+
32
+ - CSS selectors: Modern, robust, recommended approach
33
+ - Regex patterns: Fallback for edge cases or when CSS won't work
34
+
35
+ Version Extraction:
36
+
37
+ - Extract version from the discovered download URL using regex
38
+ - Support for captured groups with formatting
39
+ - Transform version numbers (e.g., "2501" -> "25.01")
40
+
41
+ Use Cases:
42
+
43
+ - Vendors with download pages listing multiple versions (7-Zip, etc.)
44
+ - Legacy software without modern APIs
45
+ - Small vendors with simple download pages
46
+ - When GitHub releases and JSON APIs aren't available
47
+
48
+ Recipe Configuration:
49
+ ```yaml
50
+ source:
51
+ strategy: web_scrape
52
+ page_url: "https://www.7-zip.org/download.html"
53
+ link_selector: 'a[href$="-x64.msi"]' # CSS (recommended)
54
+ version_pattern: "7z(\\d{2})(\\d{2})-x64" # Extract from URL
55
+ version_format: "{0}.{1}" # Transform to "25.01"
56
+ ```
57
+
58
+ Alternative with regex:
59
+ ```yaml
60
+ source:
61
+ strategy: web_scrape
62
+ page_url: "https://vendor.com/downloads"
63
+ link_pattern: 'href="(/files/app-v[0-9.]+-x64\\.msi)"'
64
+ version_pattern: "app-v([0-9.]+)-x64"
65
+ ```
66
+
67
+ Configuration Fields:
68
+
69
+ - **page_url** (str, required): URL of the page to scrape for download links
70
+ - **link_selector** (str, optional): CSS selector to find download link.
71
+ Recommended approach. Example: 'a[href$=".msi"]' finds links ending with .msi
72
+ - **link_pattern** (str, optional): Regex pattern as fallback when CSS won't
73
+ work. Must have one capture group for the URL. Example: 'href="([^"]*\\.msi)"'
74
+ - **version_pattern** (str, required): Regex pattern to extract version from
75
+ the discovered URL. Use capture groups to extract version parts. Example:
76
+ "app-(\\d+\\.\\d+)" or "7z(\\d{2})(\\d{2})"
77
+ - **version_format** (str, optional): Python format string to combine captured
78
+ groups. Use {0}, {1}, etc. for groups. Example: "{0}.{1}" transforms
79
+ captures "25", "01" into "25.01". Defaults to "{0}" (first capture group
80
+ only).
81
+
82
+ Error Handling:
83
+
84
+ - ValueError: Missing or invalid configuration fields
85
+ - RuntimeError: Page download failures, selector/pattern not found
86
+ - Errors are chained with 'from err' for better debugging
87
+
88
+ Finding CSS Selectors:
89
+
90
+ Use browser DevTools:
91
+
92
+ 1. Open download page in Chrome/Edge/Firefox
93
+ 2. Right-click download link -> Inspect
94
+ 3. Right-click highlighted element -> Copy -> Copy selector
95
+ 4. Simplify selector (e.g., 'a[href$=".msi"]' instead of complex nth-child)
96
+
97
+ Common CSS Patterns:
98
+
99
+ - 'a[href$=".msi"]' - Links ending with .msi
100
+ - 'a[href*="x64"]' - Links containing "x64"
101
+ - 'a.download' - Links with class="download"
102
+ - 'a[href$="-x64.msi"]:first-of-type' - First matching link
103
+
104
+ Example:
105
+ In a recipe YAML:
106
+ ```yaml
107
+ apps:
108
+ - name: "7-Zip"
109
+ id: "napt-7zip"
110
+ source:
111
+ strategy: web_scrape
112
+ page_url: "https://www.7-zip.org/download.html"
113
+ link_selector: 'a[href$="-x64.msi"]'
114
+ version_pattern: "7z(\\d{2})(\\d{2})-x64"
115
+ version_format: "{0}.{1}"
116
+ ```
117
+
118
+ From Python (version-first approach):
119
+ ```python
120
+ from napt.discovery.web_scrape import WebScrapeStrategy
121
+ from napt.io import download_file
122
+
123
+ strategy = WebScrapeStrategy()
124
+ app_config = {
125
+ "source": {
126
+ "page_url": "https://www.7-zip.org/download.html",
127
+ "link_selector": 'a[href$="-x64.msi"]',
128
+ "version_pattern": "7z(\\d{2})(\\d{2})-x64",
129
+ "version_format": "{0}.{1}",
130
+ }
131
+ }
132
+
133
+ # Get version WITHOUT downloading installer
134
+ version_info = strategy.get_version_info(app_config)
135
+ print(f"Latest version: {version_info.version}")
136
+
137
+ # Download only if needed
138
+ if need_to_download:
139
+ file_path, sha256, headers = download_file(
140
+ version_info.download_url, Path("./downloads")
141
+ )
142
+ print(f"Downloaded to {file_path}")
143
+ ```
144
+
145
+ From Python (using core orchestration):
146
+ ```python
147
+ from pathlib import Path
148
+ from napt.core import discover_recipe
149
+
150
+ # Automatically uses version-first optimization
151
+ result = discover_recipe(Path("recipe.yaml"), Path("./downloads"))
152
+ print(f"Version {result.version} at {result.file_path}")
153
+ ```
154
+
155
+ Note:
156
+ - Version discovery via web scraping (no installer download required)
157
+ - Core orchestration automatically skips download if version unchanged
158
+ - CSS selectors are recommended (more robust than regex)
159
+ - Use browser DevTools to find selectors easily
160
+ - Selector should match exactly one link (first match is used)
161
+ - BeautifulSoup4 required for CSS selectors
162
+ - Regex fallback works without BeautifulSoup
163
+
164
+ """
165
+
166
+ from __future__ import annotations
167
+
168
+ import re
169
+ from typing import Any
170
+ from urllib.parse import urljoin
171
+
172
+ from bs4 import BeautifulSoup
173
+ import requests
174
+
175
+ from napt.exceptions import ConfigError, NetworkError
176
+ from napt.versioning.keys import VersionInfo
177
+
178
+ from .base import register_strategy
179
+
180
+
181
+ class WebScrapeStrategy:
182
+ """Discovery strategy for web scraping download pages.
183
+
184
+ Configuration example:
185
+ ```yaml
186
+ source:
187
+ strategy: web_scrape
188
+ page_url: "https://vendor.com/download.html"
189
+ link_selector: 'a[href$=".msi"]'
190
+ version_pattern: "app-v([0-9.]+)"
191
+ ```
192
+ """
193
+
194
+ def get_version_info(
195
+ self,
196
+ app_config: dict[str, Any],
197
+ ) -> VersionInfo:
198
+ """Scrape download page for version and URL without downloading
199
+ (version-first path).
200
+
201
+ This method scrapes an HTML page, finds a download link using CSS selector
202
+ or regex, extracts the version from that link, and returns version info.
203
+ If the version matches cached state, the download can be skipped entirely.
204
+
205
+ Args:
206
+ app_config: App configuration containing source.page_url,
207
+ source.link_selector or source.link_pattern, and
208
+ source.version_pattern.
209
+
210
+ Returns:
211
+ Version info with version string, download URL, and
212
+ source name.
213
+
214
+ Raises:
215
+ ValueError: If required config fields are missing, invalid, or if
216
+ selectors/patterns don't match anything.
217
+ RuntimeError: If page download fails (chained with 'from err').
218
+
219
+ Example:
220
+ Scrape 7-Zip download page:
221
+ ```python
222
+ strategy = WebScrapeStrategy()
223
+ config = {
224
+ "source": {
225
+ "page_url": "https://www.7-zip.org/download.html",
226
+ "link_selector": 'a[href$="-x64.msi"]',
227
+ "version_pattern": "7z(\\d{2})(\\d{2})-x64",
228
+ "version_format": "{0}.{1}"
229
+ }
230
+ }
231
+ version_info = strategy.get_version_info(config)
232
+ # version_info.version returns: '25.01'
233
+ ```
234
+
235
+ """
236
+ from napt.logging import get_global_logger
237
+
238
+ logger = get_global_logger()
239
+ # Validate configuration
240
+ source = app_config.get("source", {})
241
+ page_url = source.get("page_url")
242
+ if not page_url:
243
+ raise ConfigError(
244
+ "web_scrape strategy requires 'source.page_url' in config"
245
+ )
246
+
247
+ link_selector = source.get("link_selector")
248
+ link_pattern = source.get("link_pattern")
249
+
250
+ if not link_selector and not link_pattern:
251
+ raise ConfigError(
252
+ "web_scrape strategy requires either 'source.link_selector' or "
253
+ "'source.link_pattern' in config"
254
+ )
255
+
256
+ version_pattern = source.get("version_pattern")
257
+ if not version_pattern:
258
+ raise ConfigError(
259
+ "web_scrape strategy requires 'source.version_pattern' in config"
260
+ )
261
+
262
+ version_format = source.get("version_format", "{0}")
263
+
264
+ logger.verbose("DISCOVERY", "Strategy: web_scrape (version-first)")
265
+ logger.verbose("DISCOVERY", f"Page URL: {page_url}")
266
+ if link_selector:
267
+ logger.verbose("DISCOVERY", f"Link selector (CSS): {link_selector}")
268
+ if link_pattern:
269
+ logger.verbose("DISCOVERY", f"Link pattern (regex): {link_pattern}")
270
+ logger.verbose("DISCOVERY", f"Version pattern: {version_pattern}")
271
+
272
+ # Download the HTML page
273
+ logger.verbose("DISCOVERY", f"Fetching page: {page_url}")
274
+ try:
275
+ response = requests.get(page_url, timeout=30)
276
+ response.raise_for_status()
277
+ except requests.exceptions.HTTPError as err:
278
+ raise NetworkError(
279
+ f"Failed to fetch page: {response.status_code} {response.reason}"
280
+ ) from err
281
+ except requests.exceptions.RequestException as err:
282
+ raise NetworkError(f"Failed to fetch page: {err}") from err
283
+
284
+ html_content = response.text
285
+ logger.verbose("DISCOVERY", f"Page fetched ({len(html_content)} bytes)")
286
+
287
+ # Find download link using CSS selector or regex
288
+ download_url = None
289
+
290
+ if link_selector:
291
+ # Use CSS selector with BeautifulSoup4
292
+ soup = BeautifulSoup(html_content, "html.parser")
293
+ element = soup.select_one(link_selector)
294
+
295
+ if not element:
296
+ raise ConfigError(
297
+ f"CSS selector {link_selector!r} did not match any elements on page"
298
+ )
299
+
300
+ # Get href attribute
301
+ href = element.get("href")
302
+ if not href:
303
+ raise ConfigError(
304
+ f"Element matched by {link_selector!r} has no href attribute"
305
+ )
306
+
307
+ logger.verbose("DISCOVERY", f"Found link via CSS: {href}")
308
+
309
+ # Build absolute URL
310
+ download_url = urljoin(page_url, href)
311
+
312
+ elif link_pattern:
313
+ # Use regex fallback
314
+ try:
315
+ pattern = re.compile(link_pattern)
316
+ match = pattern.search(html_content)
317
+
318
+ if not match:
319
+ raise ConfigError(
320
+ f"Regex pattern {link_pattern!r} did not match anything on page"
321
+ )
322
+
323
+ # Get first capture group or full match
324
+ if pattern.groups > 0:
325
+ href = match.group(1)
326
+ else:
327
+ href = match.group(0)
328
+
329
+ logger.verbose("DISCOVERY", f"Found link via regex: {href}")
330
+
331
+ # Build absolute URL
332
+ download_url = urljoin(page_url, href)
333
+
334
+ except re.error as err:
335
+ raise ConfigError(
336
+ f"Invalid link_pattern regex: {link_pattern!r}"
337
+ ) from err
338
+
339
+ logger.verbose("DISCOVERY", f"Download URL: {download_url}")
340
+
341
+ # Extract version from the download URL
342
+ try:
343
+ version_regex = re.compile(version_pattern)
344
+ match = version_regex.search(download_url)
345
+
346
+ if not match:
347
+ raise ConfigError(
348
+ f"Version pattern {version_pattern!r} did not match "
349
+ f"URL {download_url!r}"
350
+ )
351
+
352
+ # Get captured groups
353
+ groups = match.groups()
354
+
355
+ if not groups:
356
+ # No capture groups, use full match
357
+ version_str = match.group(0)
358
+ else:
359
+ # Format using captured groups
360
+ try:
361
+ version_str = version_format.format(*groups)
362
+ except (IndexError, KeyError) as err:
363
+ raise ConfigError(
364
+ f"version_format {version_format!r} failed with "
365
+ f"groups {groups}: {err}"
366
+ ) from err
367
+
368
+ except re.error as err:
369
+ raise ConfigError(
370
+ f"Invalid version_pattern regex: {version_pattern!r}"
371
+ ) from err
372
+
373
+ logger.verbose("DISCOVERY", f"Extracted version: {version_str}")
374
+
375
+ return VersionInfo(
376
+ version=version_str,
377
+ download_url=download_url,
378
+ source="web_scrape",
379
+ )
380
+
381
+ def validate_config(self, app_config: dict[str, Any]) -> list[str]:
382
+ """Validate web_scrape strategy configuration.
383
+
384
+ Checks for required fields and correct types without making network calls.
385
+
386
+ Args:
387
+ app_config: The app configuration from the recipe.
388
+
389
+ Returns:
390
+ List of error messages (empty if valid).
391
+
392
+ """
393
+ errors = []
394
+ source = app_config.get("source", {})
395
+
396
+ # Check page_url
397
+ if "page_url" not in source:
398
+ errors.append("Missing required field: source.page_url")
399
+ elif not isinstance(source["page_url"], str):
400
+ errors.append("source.page_url must be a string")
401
+ elif not source["page_url"].strip():
402
+ errors.append("source.page_url cannot be empty")
403
+
404
+ # Check that at least one link finding method is provided
405
+ link_selector = source.get("link_selector")
406
+ link_pattern = source.get("link_pattern")
407
+
408
+ if not link_selector and not link_pattern:
409
+ errors.append(
410
+ "Missing required field: must provide either "
411
+ "source.link_selector or source.link_pattern"
412
+ )
413
+
414
+ # Validate link_selector if provided
415
+ if link_selector:
416
+ if not isinstance(link_selector, str):
417
+ errors.append("source.link_selector must be a string")
418
+ elif not link_selector.strip():
419
+ errors.append("source.link_selector cannot be empty")
420
+ else:
421
+ # Try to validate CSS selector syntax
422
+ try:
423
+ # Test if selector is parseable
424
+ soup = BeautifulSoup("<html></html>", "html.parser")
425
+ soup.select_one(link_selector) # Will raise if invalid
426
+ except Exception as err:
427
+ errors.append(f"Invalid CSS selector: {err}")
428
+
429
+ # Validate link_pattern if provided
430
+ if link_pattern:
431
+ if not isinstance(link_pattern, str):
432
+ errors.append("source.link_pattern must be a string")
433
+ elif not link_pattern.strip():
434
+ errors.append("source.link_pattern cannot be empty")
435
+ else:
436
+ # Validate regex compiles
437
+ try:
438
+ re.compile(link_pattern)
439
+ except re.error as err:
440
+ errors.append(f"Invalid link_pattern regex: {err}")
441
+
442
+ # Check version_pattern
443
+ if "version_pattern" not in source:
444
+ errors.append("Missing required field: source.version_pattern")
445
+ elif not isinstance(source["version_pattern"], str):
446
+ errors.append("source.version_pattern must be a string")
447
+ elif not source["version_pattern"].strip():
448
+ errors.append("source.version_pattern cannot be empty")
449
+ else:
450
+ # Validate regex compiles
451
+ try:
452
+ re.compile(source["version_pattern"])
453
+ except re.error as err:
454
+ errors.append(f"Invalid version_pattern regex: {err}")
455
+
456
+ # Validate version_format if provided
457
+ if "version_format" in source:
458
+ if not isinstance(source["version_format"], str):
459
+ errors.append("source.version_format must be a string")
460
+ elif not source["version_format"].strip():
461
+ errors.append("source.version_format cannot be empty")
462
+
463
+ return errors
464
+
465
+
466
+ # Register this strategy when the module is imported
467
+ register_strategy("web_scrape", WebScrapeStrategy)
napt/exceptions.py ADDED
@@ -0,0 +1,149 @@
1
+ # Copyright 2025 Roger Cibrian
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Exception hierarchy for NAPT.
16
+
17
+ This module defines a custom exception hierarchy that allows library users
18
+ to distinguish between different types of errors. All exceptions inherit from
19
+ NAPTError, allowing users to catch all NAPT errors with a single except clause
20
+ if needed.
21
+
22
+ Example:
23
+ Catching specific error types:
24
+ ```python
25
+ from napt.core import discover_recipe
26
+ from napt.exceptions import ConfigError, NetworkError
27
+
28
+ try:
29
+ result = discover_recipe(Path("recipe.yaml"), Path("./downloads"))
30
+ except ConfigError as e:
31
+ print(f"Configuration error: {e}")
32
+ except NetworkError as e:
33
+ print(f"Network error: {e}")
34
+ ```
35
+
36
+ Catching all NAPT errors:
37
+ ```python
38
+ from napt.exceptions import NAPTError
39
+
40
+ try:
41
+ result = discover_recipe(Path("recipe.yaml"), Path("./downloads"))
42
+ except NAPTError as e:
43
+ print(f"NAPT error: {e}")
44
+ ```
45
+ """
46
+
47
+ from __future__ import annotations
48
+
49
+ __all__ = [
50
+ "NAPTError",
51
+ "ConfigError",
52
+ "NetworkError",
53
+ "PackagingError",
54
+ ]
55
+
56
+
57
+ class NAPTError(Exception):
58
+ """Base exception for all NAPT errors.
59
+
60
+ All NAPT-specific exceptions inherit from this class, allowing users
61
+ to catch all NAPT errors with a single except clause if needed.
62
+ """
63
+
64
+ pass
65
+
66
+
67
+ class ConfigError(NAPTError):
68
+ """Raised for configuration-related errors.
69
+
70
+ This exception is raised when there are problems with:
71
+
72
+ - YAML parse errors (syntax errors, invalid structure)
73
+ - Missing required configuration fields (e.g., no apps defined, missing
74
+ 'source.strategy' field)
75
+ - Invalid strategy configuration (unknown strategy name, invalid strategy
76
+ parameters)
77
+ - Missing recipe files (file not found)
78
+ - Recipe validation failures (invalid recipe structure, missing required
79
+ app fields)
80
+
81
+ Example:
82
+ Catching configuration errors:
83
+ ```python
84
+ from napt.exceptions import ConfigError
85
+
86
+ try:
87
+ config = load_effective_config(Path("invalid.yaml"))
88
+ except ConfigError as e:
89
+ print(f"Config error: {e}")
90
+ ```
91
+ """
92
+
93
+ pass
94
+
95
+
96
+ class NetworkError(NAPTError):
97
+ """Raised for network/download-related errors.
98
+
99
+ This exception is raised when there are problems with:
100
+
101
+ - Download failures (HTTP errors, connection timeouts, network
102
+ unreachable)
103
+ - API call failures (GitHub API errors, JSON API endpoint failures,
104
+ authentication issues)
105
+ - Network-related version extraction errors (API response parsing
106
+ failures)
107
+
108
+ Example:
109
+ Catching network errors:
110
+ ```python
111
+ from napt.exceptions import NetworkError
112
+
113
+ try:
114
+ result = discover_recipe(Path("recipe.yaml"), Path("./downloads"))
115
+ except NetworkError as e:
116
+ print(f"Network error: {e}")
117
+ ```
118
+ """
119
+
120
+ pass
121
+
122
+
123
+ class PackagingError(NAPTError):
124
+ """Raised for packaging/build-related errors.
125
+
126
+ This exception is raised when there are problems with:
127
+
128
+ - Build failures (PSADT template processing errors, file operations,
129
+ directory creation failures)
130
+ - Missing build tools (IntuneWinAppUtil.exe not found, PSADT template
131
+ missing)
132
+ - MSI extraction errors (failed to read MSI ProductVersion, unsupported
133
+ MSI format)
134
+ - Packaging operations (IntuneWinAppUtil.exe execution failures, invalid
135
+ build directory structure)
136
+
137
+ Example:
138
+ Catching packaging errors:
139
+ ```python
140
+ from napt.exceptions import PackagingError
141
+
142
+ try:
143
+ build_package(Path("recipe.yaml"), Path("./builds"))
144
+ except PackagingError as e:
145
+ print(f"Packaging error: {e}")
146
+ ```
147
+ """
148
+
149
+ pass
napt/io/__init__.py ADDED
@@ -0,0 +1,42 @@
1
+ # Copyright 2025 Roger Cibrian
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Input/Output operations for NAPT.
16
+
17
+ This module provides robust file download and upload capabilities with
18
+ features like conditional requests, retry logic, atomic writes, and
19
+ integrity verification.
20
+
21
+ Modules:
22
+ download - HTTP(S) file download with retries, conditional requests, and checksums.
23
+ upload - File upload adapters for Intune and storage providers (planned).
24
+
25
+ Example:
26
+ Basic usage:
27
+ ```python
28
+ from pathlib import Path
29
+ from napt.io import download_file
30
+
31
+ file_path, sha256, headers = download_file(
32
+ url="https://example.com/installer.msi",
33
+ destination_folder=Path("./downloads"),
34
+ )
35
+ print(f"Downloaded to {file_path} with hash {sha256}")
36
+ ```
37
+
38
+ """
39
+
40
+ from .download import NotModifiedError, download_file, make_session
41
+
42
+ __all__ = ["download_file", "NotModifiedError", "make_session"]