htmlcmp 2.0.0__tar.gz → 2.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: htmlcmp
3
- Version: 2.0.0
3
+ Version: 2.1.0
4
4
  Summary: Compare HTML files by rendered output
5
5
  Author: Andreas Stefl
6
6
  Maintainer-email: Andreas Stefl <stefl.andreas@gmail.com>
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "htmlcmp"
3
- version = "2.0.0"
3
+ version = "2.1.0"
4
4
  description = "Compare HTML files by rendered output"
5
5
  classifiers = []
6
6
  authors = [
@@ -0,0 +1,367 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import os
5
+ import sys
6
+ import json
7
+ import logging
8
+ import argparse
9
+ import subprocess
10
+ from pathlib import Path
11
+ from functools import partial
12
+ from concurrent.futures import ThreadPoolExecutor, as_completed
13
+
14
+ from rich.console import Console
15
+ from rich.markup import escape
16
+ from rich.progress import (
17
+ Progress,
18
+ SpinnerColumn,
19
+ BarColumn,
20
+ TextColumn,
21
+ MofNCompleteColumn,
22
+ TimeElapsedColumn,
23
+ TimeRemainingColumn,
24
+ )
25
+
26
+ from htmlcmp.common import setup_logging
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class Task:
32
+ """A single file to run through tidy / JSON validation."""
33
+
34
+ def __init__(self, rel: Path, path: Path):
35
+ self.rel = rel
36
+ self.path = path
37
+
38
+
39
+ class Failure:
40
+ """A file that produced warnings or errors, or errored while tidying.
41
+
42
+ ``kind`` is one of "warning" or "error". ``detail`` holds the captured
43
+ tidy / validator output, shown when details are requested.
44
+ """
45
+
46
+ def __init__(self, rel: Path, kind: str, reason: str, detail: str = ""):
47
+ self.rel = rel
48
+ self.kind = kind
49
+ self.reason = reason
50
+ self.detail = detail
51
+
52
+
53
+ def tidy_json(path: Path) -> tuple[int, str]:
54
+ """Validate a JSON file. Returns (status, detail); status 0 ok, 2 error."""
55
+ if not isinstance(path, Path):
56
+ raise TypeError("path must be a Path object")
57
+ if not path.is_file():
58
+ raise FileNotFoundError(f"{path} is not a file")
59
+
60
+ try:
61
+ with open(path, "r") as f:
62
+ json.load(f)
63
+ return 0, ""
64
+ except ValueError as exc:
65
+ return 2, f"invalid JSON: {exc}"
66
+
67
+
68
+ def tidy_html(path: Path, html_tidy_config: Path = None) -> tuple[int, str]:
69
+ """Run ``tidy`` on an HTML file.
70
+
71
+ Returns (status, detail); status 0 ok, 1 warning, 2 error, mirroring
72
+ tidy's own exit codes (0 / 1 / >1).
73
+ """
74
+ if not isinstance(path, Path):
75
+ raise TypeError("path must be a Path object")
76
+ if not path.is_file():
77
+ raise FileNotFoundError(f"{path} is not a file")
78
+ if html_tidy_config is not None and not isinstance(html_tidy_config, Path):
79
+ raise TypeError("html_tidy_config must be a Path object or None")
80
+ if html_tidy_config is not None and not html_tidy_config.is_file():
81
+ raise FileNotFoundError(f"{html_tidy_config} is not a file")
82
+
83
+ cmd = ["tidy"]
84
+ if html_tidy_config:
85
+ cmd.extend(["-config", str(html_tidy_config.resolve())])
86
+ cmd.append(str(path))
87
+ result = subprocess.run(
88
+ cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
89
+ )
90
+ detail = result.stdout or ""
91
+ if result.returncode == 1:
92
+ return 1, detail
93
+ if result.returncode > 1:
94
+ return 2, detail
95
+ return 0, detail
96
+
97
+
98
+ def tidy_file(path: Path, html_tidy_config: Path = None) -> tuple[int, str]:
99
+ if not isinstance(path, Path):
100
+ raise TypeError("path must be a Path object")
101
+ if not path.is_file():
102
+ raise FileNotFoundError(f"{path} is not a file")
103
+
104
+ if path.suffix == ".json":
105
+ return tidy_json(path)
106
+ if path.suffix == ".html":
107
+ return tidy_html(path, html_tidy_config=html_tidy_config)
108
+ # Not a tidyable file; treated as a no-op success.
109
+ return 0, ""
110
+
111
+
112
+ def tidyable_file(path: Path) -> bool:
113
+ if not isinstance(path, Path):
114
+ raise TypeError("path must be a Path object")
115
+ if not path.is_file():
116
+ raise FileNotFoundError(f"{path} is not a file")
117
+
118
+ return path.suffix in (".json", ".html")
119
+
120
+
121
+ def collect_tasks(path: Path, root: Path = None) -> list[Task]:
122
+ """Walk the tree once and return every tidyable file as a Task."""
123
+ if not isinstance(path, Path):
124
+ raise TypeError("path must be a Path object")
125
+ if not path.is_dir():
126
+ raise NotADirectoryError(f"{path} is not a directory")
127
+
128
+ if root is None:
129
+ root = path
130
+
131
+ tasks: list[Task] = []
132
+
133
+ items = sorted(path.iterdir())
134
+ for item in items:
135
+ if item.is_file() and tidyable_file(item):
136
+ tasks.append(Task(item.relative_to(root), item))
137
+ elif item.is_dir():
138
+ tasks.extend(collect_tasks(item, root=root))
139
+
140
+ return tasks
141
+
142
+
143
+ def run_task(task: Task, html_tidy_config: Path | None) -> tuple[int, str]:
144
+ logger.debug("Tidying %s", task.rel)
145
+ return tidy_file(task.path, html_tidy_config=html_tidy_config)
146
+
147
+
148
+ def make_executor(max_workers: int) -> ThreadPoolExecutor:
149
+ logger.info("Creating executor with %d worker(s)", max_workers)
150
+ return ThreadPoolExecutor(max_workers=max_workers)
151
+
152
+
153
+ def github_annotation(failure: Failure) -> None:
154
+ """Emit a GitHub Actions annotation so warnings/errors surface in the UI."""
155
+ # https://docs.github.com/actions/using-workflows/workflow-commands-for-github-actions
156
+ level = "error" if failure.kind == "error" else "warning"
157
+ print(f"::{level} file={failure.rel}::{failure.reason}")
158
+
159
+
160
+ def _style(kind: str) -> str:
161
+ return "red" if kind == "error" else "yellow"
162
+
163
+
164
+ def _print_failure(console: Console, failure: Failure, show_details: bool) -> None:
165
+ color = _style(failure.kind)
166
+ mark = "✘" if failure.kind == "error" else "▲"
167
+ console.print(
168
+ f"[{color}]{mark}[/{color}] {escape(str(failure.rel))} "
169
+ f"[{color}]— {escape(failure.reason)}[/{color}]"
170
+ )
171
+ if show_details and failure.detail:
172
+ console.print(f"[dim]{escape(failure.detail.rstrip())}[/dim]")
173
+
174
+
175
+ def _resolve(task: Task, future, failures: list[Failure], github: bool):
176
+ """Resolve a finished future into an optional Failure. Returns it or None."""
177
+ try:
178
+ status, detail = future.result()
179
+ except Exception as exc: # noqa: BLE001 - surface any tidy error as a failure
180
+ logger.exception("Error tidying %s", task.rel)
181
+ failure = Failure(task.rel, "error", f"error: {exc}")
182
+ else:
183
+ if status == 0:
184
+ logger.debug("Clean: %s", task.rel)
185
+ return None
186
+ if status == 1:
187
+ logger.info("Warnings: %s", task.rel)
188
+ failure = Failure(task.rel, "warning", "has warnings", detail)
189
+ else:
190
+ logger.info("Errors: %s", task.rel)
191
+ failure = Failure(task.rel, "error", "has errors", detail)
192
+
193
+ failures.append(failure)
194
+ if github:
195
+ github_annotation(failure)
196
+ return failure
197
+
198
+
199
+ def _run_live(future_to_task, console, failures, github, show_details):
200
+ progress = Progress(
201
+ SpinnerColumn(),
202
+ TextColumn("[progress.description]{task.description}"),
203
+ BarColumn(),
204
+ MofNCompleteColumn(),
205
+ TimeElapsedColumn(),
206
+ TimeRemainingColumn(),
207
+ console=console,
208
+ transient=True,
209
+ )
210
+ with progress:
211
+ bar = progress.add_task("tidying…", total=len(future_to_task))
212
+ for future in as_completed(future_to_task):
213
+ task = future_to_task[future]
214
+ # Transient line shows what's flowing through; only failures persist.
215
+ progress.update(bar, description=str(task.rel))
216
+ failure = _resolve(task, future, failures, github)
217
+ if failure is not None:
218
+ _print_failure(progress.console, failure, show_details)
219
+ progress.advance(bar)
220
+
221
+
222
+ def _run_plain(future_to_task, console, failures, github, show_details):
223
+ # No live region in CI / non-TTY: print failures as they happen plus a
224
+ # periodic heartbeat so long runs still show they're alive.
225
+ total = len(future_to_task)
226
+ step = max(1, total // 20)
227
+ done = 0
228
+ for future in as_completed(future_to_task):
229
+ task = future_to_task[future]
230
+ failure = _resolve(task, future, failures, github)
231
+ if failure is not None:
232
+ _print_failure(console, failure, show_details)
233
+ done += 1
234
+ if done % step == 0 or done == total:
235
+ console.print(f"[dim] … {done}/{total} tidied[/dim]")
236
+
237
+
238
+ def _print_summary(console, total: int, failures: list[Failure]) -> None:
239
+ console.rule("[bold]Summary")
240
+
241
+ n_warning = sum(1 for f in failures if f.kind == "warning")
242
+ n_error = sum(1 for f in failures if f.kind == "error")
243
+ clean = total - n_warning - n_error
244
+
245
+ if not failures:
246
+ console.print(f"[green]✓ All {total} file(s) clean.[/green]")
247
+ return
248
+
249
+ parts = [f"[green]{clean} clean[/green]"]
250
+ if n_warning:
251
+ parts.append(f"[yellow]{n_warning} with warnings[/yellow]")
252
+ if n_error:
253
+ parts.append(f"[red]{n_error} with errors[/red]")
254
+ console.print(", ".join(parts))
255
+
256
+ console.print("\n[bold]Findings:[/bold]")
257
+ for f in sorted(failures, key=lambda f: (f.kind, str(f.rel))):
258
+ color = _style(f.kind)
259
+ console.print(
260
+ f" [{color}]{escape(str(f.rel))}[/{color}] [dim]— {escape(f.reason)}[/dim]"
261
+ )
262
+
263
+
264
+ def run(
265
+ path: Path,
266
+ *,
267
+ html_tidy_config: Path | None,
268
+ max_workers: int,
269
+ console: Console,
270
+ live: bool,
271
+ github: bool,
272
+ show_details: bool,
273
+ ) -> int:
274
+ console.print(f"[bold]Tidying[/bold] {escape(str(path))}")
275
+
276
+ tasks = collect_tasks(path)
277
+ logger.info("Collected %d tidyable file(s)", len(tasks))
278
+
279
+ total = len(tasks)
280
+ if total == 0:
281
+ console.print("[dim]No tidyable files found.[/dim]")
282
+ _print_summary(console, total, [])
283
+ return 0
284
+
285
+ failures: list[Failure] = []
286
+ executor = make_executor(max_workers)
287
+ try:
288
+ future_to_task = {
289
+ executor.submit(run_task, t, html_tidy_config): t for t in tasks
290
+ }
291
+ if live:
292
+ _run_live(future_to_task, console, failures, github, show_details)
293
+ else:
294
+ _run_plain(future_to_task, console, failures, github, show_details)
295
+ finally:
296
+ executor.shutdown(wait=True)
297
+
298
+ _print_summary(console, total, failures)
299
+
300
+ # Warnings do not fail the run; only errors do.
301
+ return 1 if any(f.kind == "error" for f in failures) else 0
302
+
303
+
304
+ def main():
305
+ parser = argparse.ArgumentParser(
306
+ prog="html-tidy",
307
+ description="Run HTML tidy / JSON validation over a directory tree.",
308
+ )
309
+ parser.add_argument("path", type=Path, help="Path to directory to tidy")
310
+ parser.add_argument(
311
+ "--html-tidy-config", type=Path, help="Path to tidy config file"
312
+ )
313
+ parser.add_argument(
314
+ "-j",
315
+ "--max-workers",
316
+ type=int,
317
+ default=1,
318
+ help="Number of parallel tidy workers (default: 1)",
319
+ )
320
+ parser.add_argument(
321
+ "--details",
322
+ action="store_true",
323
+ help="Print the full tidy / validator output for each finding",
324
+ )
325
+ parser.add_argument(
326
+ "--no-progress",
327
+ action="store_true",
328
+ help="Disable the live progress bar (forced off when not a TTY / in CI)",
329
+ )
330
+ parser.add_argument(
331
+ "-v",
332
+ "--verbose",
333
+ action="count",
334
+ default=0,
335
+ help="Increase verbosity (-v, -vv, -vvv)",
336
+ )
337
+ parser.add_argument("--log-file", type=Path, help="Path to log file")
338
+ parser.add_argument(
339
+ "--log-file-verbosity", type=int, help="Log file verbosity level"
340
+ )
341
+ args = parser.parse_args()
342
+
343
+ setup_logging(args.verbose, args.log_file, args.log_file_verbosity)
344
+
345
+ if not args.path.is_dir():
346
+ print(f"Argument must be a directory: {args.path}", file=sys.stderr)
347
+ return 2
348
+
349
+ console = Console()
350
+ github = os.environ.get("GITHUB_ACTIONS") == "true"
351
+ in_ci = bool(os.environ.get("CI"))
352
+ live = console.is_terminal and not in_ci and not args.no_progress
353
+ show_details = args.details or args.verbose > 0
354
+
355
+ return run(
356
+ args.path,
357
+ html_tidy_config=args.html_tidy_config,
358
+ max_workers=args.max_workers,
359
+ console=console,
360
+ live=live,
361
+ github=github,
362
+ show_details=show_details,
363
+ )
364
+
365
+
366
+ if __name__ == "__main__":
367
+ sys.exit(main())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: htmlcmp
3
- Version: 2.0.0
3
+ Version: 2.1.0
4
4
  Summary: Compare HTML files by rendered output
5
5
  Author: Andreas Stefl
6
6
  Maintainer-email: Andreas Stefl <stefl.andreas@gmail.com>
@@ -1,169 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
-
4
- import sys
5
- import argparse
6
- import json
7
- import subprocess
8
- from pathlib import Path
9
-
10
- from htmlcmp.common import bcolors
11
-
12
-
13
- def tidy_json(path: Path, verbose: bool = False) -> int:
14
- if not isinstance(path, Path):
15
- raise TypeError("path must be a Path object")
16
- if not path.is_file():
17
- raise FileNotFoundError(f"{path} is not a file")
18
-
19
- try:
20
- with open(path, "r") as f:
21
- json.load(f)
22
- return 0
23
- except ValueError:
24
- print(f"{bcolors.FAIL}Error: {path} is not a valid JSON file{bcolors.ENDC}")
25
- return 1
26
-
27
-
28
- def tidy_html(path: Path, html_tidy_config: Path = None, verbose: bool = False) -> int:
29
- if not isinstance(path, Path):
30
- raise TypeError("path must be a Path object")
31
- if not path.is_file():
32
- raise FileNotFoundError(f"{path} is not a file")
33
- if html_tidy_config is not None and not isinstance(html_tidy_config, Path):
34
- raise TypeError("html_tidy_config must be a Path object or None")
35
- if html_tidy_config is not None and not html_tidy_config.is_file():
36
- raise FileNotFoundError(f"{html_tidy_config} is not a file")
37
-
38
- cmd = ["tidy"]
39
- if html_tidy_config:
40
- cmd.extend(["-config", str(html_tidy_config.resolve())])
41
- cmd.append(str(path))
42
- result = subprocess.run(
43
- cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
44
- )
45
- if result.stdout:
46
- if verbose and result.returncode == 0:
47
- print(result.stdout)
48
- elif verbose and result.returncode == 1:
49
- print(f"{bcolors.WARNING}Warning: {path} has warnings{bcolors.ENDC}")
50
- print(f"{bcolors.WARNING}{result.stdout}{bcolors.ENDC}")
51
- elif verbose or result.returncode > 1:
52
- print(f"{bcolors.FAIL}Error: {path} has errors{bcolors.ENDC}")
53
- print(f"{bcolors.FAIL}{result.stdout}{bcolors.ENDC}")
54
- if result.returncode == 1:
55
- return 1
56
- if result.returncode > 1:
57
- return 2
58
- return 0
59
-
60
-
61
- def tidy_file(path: Path, html_tidy_config: Path = None, verbose: bool = False) -> int:
62
- if not isinstance(path, Path):
63
- raise TypeError("path must be a Path object")
64
- if not path.is_file():
65
- raise FileNotFoundError(f"{path} is not a file")
66
-
67
- if path.suffix == ".json":
68
- return tidy_json(path, verbose=verbose)
69
- elif path.suffix == ".html":
70
- return tidy_html(path, html_tidy_config=html_tidy_config, verbose=verbose)
71
-
72
-
73
- def tidyable_file(path: Path) -> bool:
74
- if not isinstance(path, Path):
75
- raise TypeError("path must be a Path object")
76
- if not path.is_file():
77
- raise FileNotFoundError(f"{path} is not a file")
78
-
79
- if path.suffix == ".json":
80
- return True
81
- if path.suffix == ".html":
82
- return True
83
- return False
84
-
85
-
86
- def tidy_dir(
87
- path: Path,
88
- level: int = 0,
89
- prefix: str = "",
90
- html_tidy_config: Path = None,
91
- verbose: bool = False,
92
- ) -> dict[str, list[Path]]:
93
- if not isinstance(path, Path):
94
- raise TypeError("path must be a Path object")
95
- if not path.is_dir():
96
- raise NotADirectoryError(f"{path} is not a directory")
97
- if not isinstance(level, int) or level < 0:
98
- raise ValueError("level must be a non-negative integer")
99
- if not isinstance(prefix, str):
100
- raise TypeError("prefix must be a string")
101
- if html_tidy_config is not None and not isinstance(html_tidy_config, Path):
102
- raise TypeError("html_tidy_config must be a Path object or None")
103
- if html_tidy_config is not None and not html_tidy_config.is_file():
104
- raise FileNotFoundError(f"{html_tidy_config} is not a file")
105
-
106
- prefix_file = prefix + "├── "
107
- if level == 0:
108
- print(f"tidy dir {path}")
109
-
110
- result = {
111
- "warning": [],
112
- "error": [],
113
- }
114
-
115
- items = list(path.iterdir())
116
- files = sorted(p for p in items if p.is_file() and tidyable_file(p))
117
- dirs = sorted(p for p in items if p.is_dir())
118
-
119
- for filepath in files:
120
- filename = filepath.name
121
- tidy = tidy_file(filepath, html_tidy_config=html_tidy_config, verbose=verbose)
122
- if tidy == 0:
123
- print(f"{prefix_file}{bcolors.OKGREEN}{filename} ✓{bcolors.ENDC}")
124
- elif tidy == 1:
125
- print(f"{prefix_file}{bcolors.WARNING}{filename} ✓{bcolors.ENDC}")
126
- result["warning"].append(filepath)
127
- elif tidy > 1:
128
- print(f"{prefix_file}{bcolors.FAIL}{filename} ✘{bcolors.ENDC}")
129
- result["error"].append(filepath)
130
-
131
- for dirpath in dirs:
132
- print(prefix + "├── " + dirpath.name)
133
- subresult = tidy_dir(
134
- dirpath,
135
- level=level + 1,
136
- prefix=prefix + "│ ",
137
- html_tidy_config=html_tidy_config,
138
- verbose=verbose,
139
- )
140
- result["warning"].extend(subresult["warning"])
141
- result["error"].extend(subresult["error"])
142
-
143
- return result
144
-
145
-
146
- def main():
147
- parser = argparse.ArgumentParser()
148
- parser.add_argument("path", type=Path, help="Path to directory to tidy")
149
- parser.add_argument(
150
- "--html-tidy-config", type=Path, help="Path to tidy config file"
151
- )
152
- parser.add_argument(
153
- "--verbose",
154
- action="store_true",
155
- help="Print verbose output (warnings and errors)",
156
- )
157
- args = parser.parse_args()
158
-
159
- result = tidy_dir(
160
- args.path, html_tidy_config=args.html_tidy_config, verbose=args.verbose
161
- )
162
- if result["error"]:
163
- return 1
164
-
165
- return 0
166
-
167
-
168
- if __name__ == "__main__":
169
- sys.exit(main())
File without changes
File without changes
File without changes
File without changes