litscan 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
litscan-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.4
2
+ Name: litscan
3
+ Version: 1.0.0
4
+ Summary: A small CLI tool that scans a codebase for string and numeric literals, helping you quickly spot hard-coded values in source files.
5
+ Author: Ron Webb
6
+ Author-email: ron@ronella.xyz
7
+ Requires-Python: >=3.14
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.14
10
+ Requires-Dist: click (>=8.0.0,<9.0.0)
11
+ Requires-Dist: rich (>=15.0.0,<16.0.0)
@@ -0,0 +1,7 @@
1
+ """litscan package.
2
+
3
+ Author: Ron Webb
4
+ Since: 1.0.0
5
+ """
6
+
7
+ __version__ = "1.0.0"
@@ -0,0 +1,264 @@
1
+ """Command-line interface for litscan.
2
+
3
+ Author: Ron Webb
4
+ Since: 1.0.0
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import concurrent.futures
10
+ import os
11
+ import tempfile
12
+ import uuid
13
+ from pathlib import Path
14
+
15
+ import click
16
+ from rich.console import Console
17
+ from rich.progress import (
18
+ BarColumn,
19
+ MofNCompleteColumn,
20
+ Progress,
21
+ SpinnerColumn,
22
+ TextColumn,
23
+ TimeElapsedColumn,
24
+ )
25
+
26
+ from . import __version__
27
+ from .reporter import write_outputs
28
+ from .scanner import scan_file
29
+ from .store import SessionStore
30
+ from .util import setup_logger
31
+
32
+ _VALID_FORMATS = ("json", "html", "all")
33
+ _APP_NAME = "litscan"
34
+ _console = Console(stderr=True)
35
+ _logger = setup_logger(__name__)
36
+
37
+
38
+ def _parse_extensions(raw: str) -> list[str]:
39
+ """Parse a comma-separated extension string into a normalised list.
40
+
41
+ Each entry is lowercased and prefixed with a dot when absent.
42
+ Example: ``"py,js, TS"`` → ``['.py', '.js', '.ts']``
43
+
44
+ Author: Ron Webb
45
+ Since: 1.0.0
46
+ """
47
+ result: list[str] = []
48
+ for part in raw.split(","):
49
+ ext = part.strip().lower()
50
+ if ext and not ext.startswith("."):
51
+ ext = "." + ext
52
+ if ext:
53
+ result.append(ext)
54
+ return result
55
+
56
+
57
+ def _parse_paths(raw: str) -> list[Path]:
58
+ """Parse a semicolon-separated path string into a list of Path objects.
59
+
60
+ Example: ``"/src/a; /src/b"`` → ``[Path('/src/a'), Path('/src/b')]``
61
+
62
+ Author: Ron Webb
63
+ Since: 1.0.0
64
+ """
65
+ result: list[Path] = []
66
+ for part in raw.split(";"):
67
+ stripped = part.strip()
68
+ if stripped:
69
+ result.append(Path(stripped))
70
+ return result
71
+
72
+
73
+ def _scan_and_store(task: tuple[Path, SessionStore, str]) -> None:
74
+ """Scan one file and write its occurrences to the session store.
75
+
76
+ Accepts a 3-tuple so the function can be passed directly to
77
+ :meth:`concurrent.futures.Executor.map` without a closure.
78
+
79
+ Author: Ron Webb
80
+ Since: 1.0.0
81
+ """
82
+ file_path, store, session_id = task
83
+ store.insert_occurrences(session_id, scan_file(file_path))
84
+
85
+
86
+ def discover_files(path: Path, extensions: list[str]) -> list[Path]:
87
+ """Discover files under *path* that match the given extensions.
88
+
89
+ When *extensions* is empty every file is included.
90
+ Both files and directories are accepted; for a plain file the extension
91
+ filter still applies.
92
+
93
+ Author: Ron Webb
94
+ Since: 1.0.0
95
+ """
96
+ candidates: list[Path]
97
+ if path.is_file():
98
+ candidates = [path]
99
+ elif path.is_dir():
100
+ candidates = sorted(f for f in path.rglob("*") if f.is_file())
101
+ else:
102
+ return []
103
+
104
+ if not extensions:
105
+ return candidates
106
+ return [f for f in candidates if f.suffix.lower() in extensions]
107
+
108
+
109
+ def _run_concurrent_scan(
110
+ files: list[Path],
111
+ store: SessionStore,
112
+ session_id: str,
113
+ workers: int,
114
+ ) -> None:
115
+ """Scan *files* concurrently and store results under *session_id*.
116
+
117
+ Displays a live progress bar via the module-level rich console.
118
+
119
+ Author: Ron Webb
120
+ Since: 1.0.0
121
+ """
122
+ with Progress(
123
+ SpinnerColumn(),
124
+ TextColumn("[progress.description]{task.description}"),
125
+ BarColumn(),
126
+ MofNCompleteColumn(),
127
+ TimeElapsedColumn(),
128
+ console=_console,
129
+ transient=True,
130
+ ) as progress:
131
+ task = progress.add_task("[cyan]Scanning\u2026", total=len(files))
132
+ with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
133
+ futures = {
134
+ executor.submit(_scan_and_store, (f, store, session_id)): f
135
+ for f in files
136
+ }
137
+ for future in concurrent.futures.as_completed(futures):
138
+ try:
139
+ future.result()
140
+ except Exception as exc: # pylint: disable=broad-exception-caught
141
+ _logger.warning("Failed to scan %s: %s", futures[future], exc)
142
+ progress.advance(task)
143
+
144
+
145
+ @click.command()
146
+ @click.argument("path")
147
+ @click.option(
148
+ "--ext",
149
+ default="",
150
+ help=(
151
+ "Comma-separated file extensions to include "
152
+ "(e.g. py,java,js,ts). Omit to scan all files."
153
+ ),
154
+ )
155
+ @click.option(
156
+ "--output",
157
+ default="litscan-output",
158
+ help=(
159
+ "Base name (without extension) for the output file(s) "
160
+ "(default: litscan-output)."
161
+ ),
162
+ )
163
+ @click.option(
164
+ "--output-dir",
165
+ "output_dir",
166
+ default="reports",
167
+ type=click.Path(path_type=Path),
168
+ help=(
169
+ "Directory where the output file will be written "
170
+ "(default: reports). "
171
+ "The filename from --output is placed inside this directory."
172
+ ),
173
+ )
174
+ @click.option(
175
+ "--format",
176
+ "fmt",
177
+ default="json",
178
+ type=click.Choice(_VALID_FORMATS),
179
+ help="Output format: json, html, or all (default: json).",
180
+ )
181
+ @click.option(
182
+ "--workers",
183
+ type=int,
184
+ default=min(32, (os.cpu_count() or 1) + 4),
185
+ help=(
186
+ "Number of parallel worker threads used to scan files "
187
+ "(default: min(32, cpu_count + 4))."
188
+ ),
189
+ )
190
+ @click.option(
191
+ "--db",
192
+ "db_path",
193
+ default=str(Path(tempfile.gettempdir()) / "litscan.db"),
194
+ type=click.Path(path_type=Path),
195
+ help=(
196
+ "Path to the SQLite scratch database used to store occurrences "
197
+ "during a scan run (default: <system-temp>/litscan.db). "
198
+ "Session records are removed after the report is written."
199
+ ),
200
+ )
201
+ def main( # pylint: disable=too-many-arguments,too-many-positional-arguments,too-many-locals
202
+ path: str,
203
+ ext: str,
204
+ output: str,
205
+ output_dir: Path,
206
+ fmt: str,
207
+ workers: int,
208
+ db_path: Path,
209
+ ) -> None:
210
+ """Scan source files for string and numeric literals.
211
+
212
+ Author: Ron Webb
213
+ Since: 1.0.0
214
+ """
215
+ _header = f"{_APP_NAME} v{__version__}"
216
+ _logger.info(_header)
217
+ _console.print(f"[bold]{_header}[/bold]")
218
+ extensions = _parse_extensions(ext) if ext else []
219
+ paths = _parse_paths(path)
220
+ seen: set[Path] = set()
221
+ files: list[Path] = []
222
+
223
+ with _console.status("[bold cyan]Discovering files\u2026", spinner="dots"):
224
+ for target_path in paths:
225
+ for found_file in discover_files(target_path, extensions):
226
+ if found_file not in seen:
227
+ seen.add(found_file)
228
+ files.append(found_file)
229
+
230
+ if not files:
231
+ _logger.info("No files found in %s", path)
232
+ _console.print("[yellow]No files found.[/yellow]")
233
+ return
234
+
235
+ _console.print(f"[bold]Scanning[/bold] {len(files)} file(s)\u2026")
236
+
237
+ session_id = str(uuid.uuid4())
238
+ store = SessionStore(db_path)
239
+ try:
240
+ _run_concurrent_scan(files, store, session_id, workers)
241
+ groups = store.read_groups(session_id)
242
+ stem = Path(output).stem
243
+ written = write_outputs(groups, output_dir, stem, fmt)
244
+ finally:
245
+ store.delete_session(session_id)
246
+ store.close()
247
+
248
+ total = sum(g["count"] for g in groups)
249
+ _logger.info(
250
+ "Found %s literals (%s unique) -> %s",
251
+ total,
252
+ len(groups),
253
+ ", ".join(str(p) for p in written),
254
+ )
255
+ _console.print(
256
+ f"[bold green]\u2713[/bold green] "
257
+ f"[bold]{total}[/bold] literals "
258
+ f"([bold]{len(groups)}[/bold] unique) "
259
+ f"\u2192 {', '.join(str(p) for p in written)}"
260
+ )
261
+
262
+
263
+ if __name__ == "__main__":
264
+ main() # pylint: disable=no-value-for-parameter
@@ -0,0 +1,28 @@
1
+ [loggers]
2
+ keys=root
3
+
4
+ [handlers]
5
+ keys=consoleHandler,fileHandler
6
+
7
+ [formatters]
8
+ keys=logFormatter,consoleFormatter
9
+
10
+ [logger_root]
11
+ level=INFO
12
+ handlers=fileHandler
13
+
14
+ [handler_consoleHandler]
15
+ class=StreamHandler
16
+ formatter=consoleFormatter
17
+ args=(sys.stderr,)
18
+
19
+ [handler_fileHandler]
20
+ class=FileHandler
21
+ formatter=logFormatter
22
+ args=('litscan.log', 'a')
23
+
24
+ [formatter_logFormatter]
25
+ format=%(asctime)s [%(levelname)s] %(name)s - %(message)s
26
+
27
+ [formatter_consoleFormatter]
28
+ format=%(asctime)s - %(name)s - %(levelname)s - %(message)s
@@ -0,0 +1,356 @@
1
+ """Report generation for litscan: JSON and HTML output writers.
2
+
3
+ Author: Ron Webb
4
+ Since: 1.0.0
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import html as _html
10
+ import json
11
+ from datetime import datetime
12
+ from pathlib import Path
13
+
14
+ from . import __version__
15
+ from .scanner import LiteralGroup, ScanReport
16
+
17
+ _DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
18
+
19
+ _CSS = """
20
+ * { box-sizing: border-box; margin: 0; padding: 0; }
21
+ body {
22
+ font-family: 'Segoe UI', Arial, sans-serif;
23
+ background: #f5f7fa;
24
+ color: #2c3e50;
25
+ padding: 2em;
26
+ }
27
+ header {
28
+ background: #1a3a5c;
29
+ color: #ffffff;
30
+ border-radius: 8px;
31
+ padding: 1.2em 1.8em;
32
+ margin-bottom: 1.5em;
33
+ }
34
+ header h1 { font-size: 1.6em; font-weight: 700; letter-spacing: 0.02em; }
35
+ header p { font-size: 0.9em; margin-top: 0.3em; opacity: 0.85; }
36
+ .summary {
37
+ font-size: 0.9em;
38
+ color: #555;
39
+ margin-bottom: 1em;
40
+ }
41
+ .table-wrap { overflow-x: auto; border-radius: 8px; box-shadow: 0 2px 8px rgba(0,0,0,.12); }
42
+ table {
43
+ border-collapse: collapse;
44
+ width: 100%;
45
+ background: #ffffff;
46
+ font-size: 0.88em;
47
+ }
48
+ thead tr { background: #1a3a5c; color: #ffffff; }
49
+ th {
50
+ padding: 0.75em 1em;
51
+ text-align: left;
52
+ font-weight: 600;
53
+ letter-spacing: 0.03em;
54
+ white-space: nowrap;
55
+ }
56
+ th.sortable { cursor: pointer; user-select: none; }
57
+ th.sortable:hover { background: #254e80; }
58
+ th.sortable .sort-icon { margin-left: 0.4em; font-size: 0.8em; }
59
+ th.sortable.asc .sort-icon::after { content: '\\25b2'; }
60
+ th.sortable.desc .sort-icon::after { content: '\\25bc'; }
61
+ th.sortable:not(.asc):not(.desc) .sort-icon::after { content: '\\21c5'; opacity: 0.6; }
62
+ .filter-row th { background: #1a3a5c; padding: 0.3em 1em 0.5em; }
63
+ .filter-row input {
64
+ width: 100%;
65
+ padding: 0.3em 0.5em;
66
+ border: 1px solid #3d6b9e;
67
+ border-radius: 4px;
68
+ background: #1e4575;
69
+ color: #fff;
70
+ font-size: 0.85em;
71
+ outline: none;
72
+ }
73
+ .filter-row input::placeholder { color: #aac4e8; }
74
+ .filter-row input:focus { border-color: #7eb3e8; background: #255085; }
75
+ td {
76
+ padding: 0.6em 1em;
77
+ border-bottom: 1px solid #e8ecf0;
78
+ vertical-align: top;
79
+ }
80
+ tbody tr.alt-row { background: #f0f4f8; }
81
+ tbody tr:hover { background: #dde9f7; }
82
+ td.row-num { text-align: right; color: #7f8c8d; font-variant-numeric: tabular-nums; width: 3em; }
83
+ td.count { text-align: center; font-weight: 600; color: #1a3a5c; width: 5em; }
84
+ td.literal code {
85
+ background: #eef2f7;
86
+ border-radius: 3px;
87
+ padding: 0.1em 0.4em;
88
+ font-family: 'Consolas', 'Courier New', monospace;
89
+ font-size: 0.95em;
90
+ word-break: break-all;
91
+ }
92
+ td.literal code .truncated {
93
+ color: #999;
94
+ font-style: italic;
95
+ cursor: help;
96
+ user-select: none;
97
+ }
98
+ td.locations { font-family: 'Consolas', 'Courier New', monospace; font-size: 0.82em; color: #555; word-break: break-all; }
99
+ footer { margin-top: 1.5em; font-size: 0.8em; color: #aaa; text-align: center; }
100
+ """
101
+
102
+
103
+ def _write_json(groups: list[LiteralGroup], path: Path, run_date: str) -> None:
104
+ """Write literal groups as a JSON file.
105
+
106
+ Author: Ron Webb
107
+ Since: 1.0.0
108
+ """
109
+ report: ScanReport = {
110
+ "application": "litscan",
111
+ "version": __version__,
112
+ "run-date": run_date,
113
+ "findings": groups,
114
+ }
115
+ path.write_text(json.dumps(report, indent=2), encoding="utf-8")
116
+
117
+
118
+ _TRUNCATED_MARKER = '<span class="truncated" title="Multiline literal \u2014 only first line shown">\u2026</span>'
119
+
120
+
121
+ def _literal_display(literal: str) -> str:
122
+ """Return HTML for the first line of *literal* with a truncation marker when multiline.
123
+
124
+ Author: Ron Webb
125
+ Since: 1.0.0
126
+ """
127
+ first_line, _, rest = literal.partition("\n")
128
+ display = _html.escape(first_line, quote=False)
129
+ if rest:
130
+ display += _TRUNCATED_MARKER
131
+ return display
132
+
133
+
134
+ def _build_thead_html() -> str:
135
+ """Return the HTML ``<thead>`` element with sortable column headers and an inline filter row.
136
+
137
+ Author: Ron Webb
138
+ Since: 1.0.0
139
+ """
140
+ return (
141
+ " <thead>\n"
142
+ " <tr>"
143
+ "<th>#</th>"
144
+ '<th class="sortable" onclick="litscanSortBy(0)">Literal<span class="sort-icon"></span></th>'
145
+ '<th class="sortable" onclick="litscanSortBy(1)">Count<span class="sort-icon"></span></th>'
146
+ "<th>Locations</th>"
147
+ "</tr>\n"
148
+ ' <tr class="filter-row">'
149
+ "<th></th>"
150
+ '<th><input type="text" id="filter-literal" placeholder="Filter literal\u2026"'
151
+ ' oninput="litscanFilter()"></th>'
152
+ '<th><input type="text" id="filter-count" placeholder="e.g. &gt;5"'
153
+ ' oninput="litscanFilter()"></th>'
154
+ "<th></th>"
155
+ "</tr>\n"
156
+ " </thead>\n"
157
+ )
158
+
159
+
160
+ def _build_html_scaffold(
161
+ run_date: str, total: int, unique: int, rows_html: str, script: str
162
+ ) -> str:
163
+ """Wrap table body and script into a complete HTML document.
164
+
165
+ Calls :func:`_build_thead_html` to produce the sortable column headers.
166
+
167
+ Author: Ron Webb
168
+ Since: 1.0.0
169
+ """
170
+ thead = _build_thead_html()
171
+ return (
172
+ "<!DOCTYPE html>\n"
173
+ '<html lang="en">\n'
174
+ "<head>\n"
175
+ ' <meta charset="UTF-8">\n'
176
+ ' <meta name="viewport" content="width=device-width, initial-scale=1">\n'
177
+ f" <title>LitScan {__version__} Report</title>\n"
178
+ " <style>\n"
179
+ f"{_CSS}"
180
+ " </style>\n"
181
+ "</head>\n"
182
+ "<body>\n"
183
+ " <header>\n"
184
+ f" <h1>LitScan {__version__} Report</h1>\n"
185
+ f" <p>Date Run: {run_date}</p>\n"
186
+ " </header>\n"
187
+ f' <p class="summary">Found {total} literals &mdash; {unique} unique</p>\n'
188
+ ' <div class="table-wrap">\n'
189
+ " <table>\n"
190
+ f"{thead}"
191
+ " <tbody>\n"
192
+ f"{rows_html}\n"
193
+ " </tbody>\n"
194
+ " </table>\n"
195
+ " </div>\n"
196
+ f" <footer>Generated by LitScan {__version__}</footer>\n"
197
+ f"{script}"
198
+ "</body>\n"
199
+ "</html>\n"
200
+ )
201
+
202
+
203
+ def _build_html(groups: list[LiteralGroup], run_date: str) -> str:
204
+ """Build an HTML report string for the given literal groups.
205
+
206
+ Author: Ron Webb
207
+ Since: 1.0.0
208
+ """
209
+ total = sum(g["count"] for g in groups)
210
+ unique = len(groups)
211
+
212
+ rows: list[str] = []
213
+ for idx, group in enumerate(groups, start=1):
214
+ literal_display = _literal_display(group["literal"])
215
+ literal_attr = _html.escape(group["literal"], quote=True)
216
+ count = group["count"]
217
+ locations = "<br>".join(_html.escape(f, quote=False) for f in group["files"])
218
+ alt = " alt-row" if idx % 2 == 0 else ""
219
+ rows.append(
220
+ f' <tr class="data-row{alt}"'
221
+ f' data-idx="{idx}"'
222
+ f' data-literal="{literal_attr}"'
223
+ f' data-count="{count}">'
224
+ f'<td class="row-num">{idx}</td>'
225
+ f'<td class="literal"><code>{literal_display}</code></td>'
226
+ f'<td class="count">{count}</td>'
227
+ f'<td class="locations">{locations}</td>'
228
+ f"</tr>"
229
+ )
230
+ rows_html = "\n".join(rows)
231
+ script = (
232
+ "<script>\n"
233
+ "(function () {\n"
234
+ " var sortCol = -1, sortDir = 1;\n"
235
+ "\n"
236
+ " function sortBy(col) {\n"
237
+ " if (sortCol === col) { sortDir = -sortDir; }\n"
238
+ " else { sortCol = col; sortDir = 1; }\n"
239
+ " applySort();\n"
240
+ " }\n"
241
+ "\n"
242
+ " function applySort() {\n"
243
+ " var tbody = document.querySelector('tbody');\n"
244
+ " var rows = Array.prototype.slice.call(tbody.querySelectorAll('tr'));\n"
245
+ " rows.sort(function (a, b) {\n"
246
+ " var va, vb;\n"
247
+ " if (sortCol === 0) {\n"
248
+ " va = a.dataset.literal.toLowerCase();\n"
249
+ " vb = b.dataset.literal.toLowerCase();\n"
250
+ " } else if (sortCol === 1) {\n"
251
+ " va = parseInt(a.dataset.count, 10);\n"
252
+ " vb = parseInt(b.dataset.count, 10);\n"
253
+ " } else { return 0; }\n"
254
+ " if (va < vb) return -sortDir;\n"
255
+ " if (va > vb) return sortDir;\n"
256
+ " return 0;\n"
257
+ " });\n"
258
+ " rows.forEach(function (r) { tbody.appendChild(r); });\n"
259
+ " updateNumbers();\n"
260
+ " restripe();\n"
261
+ " updateSortIcons();\n"
262
+ " }\n"
263
+ "\n"
264
+ " function updateSortIcons() {\n"
265
+ " var ths = document.querySelectorAll('thead tr:first-child th.sortable');\n"
266
+ " ths.forEach(function (th, i) {\n"
267
+ " th.classList.remove('asc', 'desc');\n"
268
+ " if (i === sortCol) { th.classList.add(sortDir === 1 ? 'asc' : 'desc'); }\n"
269
+ " });\n"
270
+ " }\n"
271
+ "\n"
272
+ " function matchCount(count, filter) {\n"
273
+ " var m = filter.match(/^(>=|<=|>|<|=)?(\\d+)$/);\n"
274
+ " if (!m) return true;\n"
275
+ " var op = m[1] || '=', val = parseInt(m[2], 10);\n"
276
+ " if (op === '>') return count > val;\n"
277
+ " if (op === '<') return count < val;\n"
278
+ " if (op === '>=') return count >= val;\n"
279
+ " if (op === '<=') return count <= val;\n"
280
+ " return count === val;\n"
281
+ " }\n"
282
+ "\n"
283
+ " function applyFilter() {\n"
284
+ " var litFilter = document.getElementById('filter-literal').value.toLowerCase();\n"
285
+ " var cntFilter = document.getElementById('filter-count').value.trim();\n"
286
+ " var tbody = document.querySelector('tbody');\n"
287
+ " tbody.querySelectorAll('tr').forEach(function (row) {\n"
288
+ " var litMatch = !litFilter || row.dataset.literal.toLowerCase().indexOf(litFilter) !== -1;\n"
289
+ " var cntMatch = !cntFilter || matchCount(parseInt(row.dataset.count, 10), cntFilter);\n"
290
+ " row.style.display = (litMatch && cntMatch) ? '' : 'none';\n"
291
+ " });\n"
292
+ " updateNumbers();\n"
293
+ " restripe();\n"
294
+ " }\n"
295
+ "\n"
296
+ " function updateNumbers() {\n"
297
+ " var num = 1;\n"
298
+ " document.querySelectorAll('tbody tr').forEach(function (row) {\n"
299
+ " if (row.style.display !== 'none') {\n"
300
+ " row.querySelector('.row-num').textContent = num++;\n"
301
+ " }\n"
302
+ " });\n"
303
+ " }\n"
304
+ "\n"
305
+ " function restripe() {\n"
306
+ " var num = 0;\n"
307
+ " document.querySelectorAll('tbody tr').forEach(function (row) {\n"
308
+ " if (row.style.display !== 'none') {\n"
309
+ " num++;\n"
310
+ " row.classList.toggle('alt-row', num % 2 === 0);\n"
311
+ " }\n"
312
+ " });\n"
313
+ " }\n"
314
+ "\n"
315
+ " window.litscanSortBy = sortBy;\n"
316
+ " window.litscanFilter = applyFilter;\n"
317
+ "})();\n"
318
+ "</script>\n"
319
+ )
320
+ return _build_html_scaffold(run_date, total, unique, rows_html, script)
321
+
322
+
323
+ def _write_html(groups: list[LiteralGroup], path: Path, run_date: str) -> None:
324
+ """Write literal groups as an HTML report file.
325
+
326
+ Author: Ron Webb
327
+ Since: 1.0.0
328
+ """
329
+ path.write_text(_build_html(groups, run_date), encoding="utf-8")
330
+
331
+
332
+ def write_outputs(
333
+ groups: list[LiteralGroup],
334
+ output_dir: Path,
335
+ stem: str,
336
+ fmt: str,
337
+ ) -> list[Path]:
338
+ """Write output files according to the requested format.
339
+
340
+ Returns the list of paths written.
341
+
342
+ Author: Ron Webb
343
+ Since: 1.0.0
344
+ """
345
+ output_dir.mkdir(parents=True, exist_ok=True)
346
+ run_date = datetime.now().strftime(_DATE_FORMAT)
347
+ written: list[Path] = []
348
+ if fmt in ("json", "all"):
349
+ json_path = output_dir / f"{stem}.json"
350
+ _write_json(groups, json_path, run_date)
351
+ written.append(json_path)
352
+ if fmt in ("html", "all"):
353
+ html_path = output_dir / f"{stem}.html"
354
+ _write_html(groups, html_path, run_date)
355
+ written.append(html_path)
356
+ return written
@@ -0,0 +1,141 @@
1
+ """Literal scanner utilities.
2
+
3
+ Author: Ron Webb
4
+ Since: 1.0.0
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import bisect
10
+ import re
11
+ from dataclasses import dataclass
12
+ from pathlib import Path
13
+ from typing import TypedDict
14
+
15
+ # Ordered alternation: triple-quoted blocks first (multiline), then single-line
16
+ # strings, then decimal numbers, then integers.
17
+ _PATTERN = re.compile(
18
+ r'"""[\s\S]*?"""'
19
+ r"|'''[\s\S]*?'''"
20
+ r'|"(?:[^"\\\n]|\\.)*"'
21
+ r"|'(?:[^'\\\n]|\\.)*'"
22
+ r"|\b\d+\.\d+\b"
23
+ r"|\b\d+\b",
24
+ )
25
+
26
+
27
+ @dataclass(frozen=True)
28
+ class LiteralOccurrence:
29
+ """Represents a discovered literal value in source code.
30
+
31
+ Author: Ron Webb
32
+ Since: 1.0.0
33
+ """
34
+
35
+ file_path: Path
36
+ line: int
37
+ column: int
38
+ value: str
39
+
40
+
41
+ def _build_line_offsets(source: str) -> list[int]:
42
+ """Return a list of character offsets where each line starts (0-indexed).
43
+
44
+ The result always begins with ``0`` (start of line 1). Each subsequent
45
+ entry is the offset of the first character on the following line.
46
+ Precomputing this once gives O(log n) line/column lookup per match via
47
+ :func:`_line_and_column`, instead of the naive O(n) slice-and-scan.
48
+
49
+ Author: Ron Webb
50
+ Since: 1.0.0
51
+ """
52
+ offsets: list[int] = [0]
53
+ start = 0
54
+ while True:
55
+ pos = source.find("\n", start)
56
+ if pos == -1:
57
+ break
58
+ offsets.append(pos + 1)
59
+ start = pos + 1
60
+ return offsets
61
+
62
+
63
+ def _line_and_column(line_offsets: list[int], offset: int) -> tuple[int, int]:
64
+ """Return 1-based line and 0-based column for a character offset in source.
65
+
66
+ *line_offsets* must be the list returned by :func:`_build_line_offsets`.
67
+ Uses :func:`bisect.bisect_right` for O(log n) lookup.
68
+
69
+ Author: Ron Webb
70
+ Since: 1.0.0
71
+ """
72
+ line = bisect.bisect_right(line_offsets, offset)
73
+ col = offset - line_offsets[line - 1]
74
+ return line, col
75
+
76
+
77
+ def scan_literals(source: str, file_path: Path) -> list[LiteralOccurrence]:
78
+ """Scan source text and collect string and numeric literals.
79
+
80
+ Works with any language or plain text file. Detects:
81
+ - Block strings/text enclosed with triple double or triple single quotes
82
+ (may span multiple lines).
83
+ - Strings/text enclosed with double or single quotes (single line).
84
+ - Decimal and integer numbers.
85
+
86
+ Author: Ron Webb
87
+ Since: 1.0.0
88
+ """
89
+ line_offsets = _build_line_offsets(source)
90
+ occurrences: list[LiteralOccurrence] = []
91
+ for match in _PATTERN.finditer(source):
92
+ line, column = _line_and_column(line_offsets, match.start())
93
+ occurrences.append(
94
+ LiteralOccurrence(
95
+ file_path=file_path,
96
+ line=line,
97
+ column=column,
98
+ value=match.group(),
99
+ )
100
+ )
101
+ return occurrences
102
+
103
+
104
+ def scan_file(file_path: Path) -> list[LiteralOccurrence]:
105
+ """Read *file_path* from disk and return all literal occurrences found in it.
106
+
107
+ Convenience wrapper around :func:`scan_literals` intended for parallel
108
+ execution: a single callable that handles both I/O and scanning so it can
109
+ be submitted directly to a :class:`concurrent.futures.Executor`.
110
+
111
+ Author: Ron Webb
112
+ Since: 1.0.0
113
+ """
114
+ contents = file_path.read_text(encoding="utf-8", errors="replace")
115
+ return scan_literals(contents, file_path)
116
+
117
+
118
+ class LiteralGroup(TypedDict):
119
+ """JSON-serialisable representation of a grouped literal.
120
+
121
+ Author: Ron Webb
122
+ Since: 1.0.0
123
+ """
124
+
125
+ count: int
126
+ literal: str
127
+ files: list[str]
128
+
129
+
130
+ # ScanReport uses the functional TypedDict syntax because "run-date" is not a
131
+ # valid Python identifier. Docstrings are not supported in this form; see the
132
+ # individual field names for documentation of the report structure.
133
+ ScanReport = TypedDict(
134
+ "ScanReport",
135
+ {
136
+ "application": str,
137
+ "version": str,
138
+ "run-date": str,
139
+ "findings": list[LiteralGroup],
140
+ },
141
+ )
@@ -0,0 +1,150 @@
1
+ """Session-scoped SQLite store for literal occurrences.
2
+
3
+ Occurrences produced during parallel file scanning are written directly to an
4
+ SQLite database instead of accumulated in memory. Every scan run is assigned
5
+ a UUID so multiple concurrent invocations share the same database file without
6
+ interference. After the report is written the caller deletes the session
7
+ records, keeping the database lean.
8
+
9
+ Author: Ron Webb
10
+ Since: 1.0.0
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import sqlite3
16
+ import threading
17
+ from pathlib import Path
18
+
19
+ from .scanner import LiteralGroup, LiteralOccurrence
20
+
21
+ _CREATE_TABLE = (
22
+ "CREATE TABLE IF NOT EXISTS occurrences ("
23
+ " session_id TEXT NOT NULL,"
24
+ " file_path TEXT NOT NULL,"
25
+ " line INTEGER NOT NULL,"
26
+ " col INTEGER NOT NULL,"
27
+ " value TEXT NOT NULL"
28
+ ")"
29
+ )
30
+ _CREATE_INDEX = "CREATE INDEX IF NOT EXISTS idx_session ON occurrences (session_id)"
31
+ _INSERT = (
32
+ "INSERT INTO occurrences (session_id, file_path, line, col, value)"
33
+ " VALUES (?, ?, ?, ?, ?)"
34
+ )
35
+ _SELECT_GROUPS = (
36
+ "SELECT value, COUNT(*) AS cnt,"
37
+ " GROUP_CONCAT(file_path || ':' || line || ':' || col, '|||')"
38
+ " FROM occurrences WHERE session_id = ?"
39
+ " GROUP BY value"
40
+ " ORDER BY cnt DESC, value ASC"
41
+ )
42
+ _DELETE_SESSION = "DELETE FROM occurrences WHERE session_id = ?"
43
+
44
+ # Separator used inside GROUP_CONCAT; must not appear in file paths or loc strings.
45
+ _LOC_SEP = "|||"
46
+
47
+
48
+ class SessionStore:
49
+ """SQLite-backed session store for literal occurrences.
50
+
51
+ A single database file is shared across all threads and concurrent runs.
52
+ Every run is identified by a *session_id* (UUID string) so records are
53
+ always isolated. Grouping and aggregation are performed entirely in SQL so
54
+ Python never holds all raw occurrences in memory at once.
55
+
56
+ Author: Ron Webb
57
+ Since: 1.0.0
58
+ """
59
+
60
+ def __init__(self, db_path: Path) -> None:
61
+ """Open (or create) the SQLite database at *db_path*.
62
+
63
+ WAL journal mode is enabled so reads and writes do not block each
64
+ other; a threading lock serialises Python-side connection calls since
65
+ :mod:`sqlite3` connection objects are not thread-safe.
66
+
67
+ Author: Ron Webb
68
+ Since: 1.0.0
69
+ """
70
+ self._conn = sqlite3.connect(db_path, check_same_thread=False)
71
+ self._lock = threading.Lock()
72
+ with self._lock:
73
+ self._conn.execute("PRAGMA journal_mode=WAL")
74
+ self._conn.execute("PRAGMA synchronous=NORMAL")
75
+ self._conn.execute(_CREATE_TABLE)
76
+ self._conn.execute(_CREATE_INDEX)
77
+ self._conn.commit()
78
+
79
+ def insert_occurrences(
80
+ self, session_id: str, occurrences: list[LiteralOccurrence]
81
+ ) -> None:
82
+ """Persist *occurrences* for *session_id* in the database.
83
+
84
+ Author: Ron Webb
85
+ Since: 1.0.0
86
+ """
87
+ rows = [
88
+ (session_id, str(o.file_path), o.line, o.column, o.value)
89
+ for o in occurrences
90
+ ]
91
+ with self._lock:
92
+ self._conn.executemany(_INSERT, rows)
93
+ self._conn.commit()
94
+
95
+ def read_groups(self, session_id: str) -> list[LiteralGroup]:
96
+ """Return grouped literals for *session_id*, aggregated in SQL.
97
+
98
+ Grouping and counting are done entirely inside SQLite; only the final
99
+ :class:`~litscan.scanner.LiteralGroup` objects are constructed in
100
+ Python, so memory usage is proportional to the number of *unique*
101
+ literals, not the total number of occurrences.
102
+
103
+ Author: Ron Webb
104
+ Since: 1.0.0
105
+ """
106
+ with self._lock:
107
+ rows = self._conn.execute(_SELECT_GROUPS, (session_id,)).fetchall()
108
+ return [
109
+ LiteralGroup(
110
+ count=row[1],
111
+ literal=row[0],
112
+ files=row[2].split(_LOC_SEP) if row[2] else [],
113
+ )
114
+ for row in rows
115
+ ]
116
+
117
+ def delete_session(self, session_id: str) -> None:
118
+ """Remove all occurrences belonging to *session_id* from the database.
119
+
120
+ Author: Ron Webb
121
+ Since: 1.0.0
122
+ """
123
+ with self._lock:
124
+ self._conn.execute(_DELETE_SESSION, (session_id,))
125
+ self._conn.commit()
126
+
127
+ def close(self) -> None:
128
+ """Close the underlying database connection.
129
+
130
+ Author: Ron Webb
131
+ Since: 1.0.0
132
+ """
133
+ with self._lock:
134
+ self._conn.close()
135
+
136
+ def __enter__(self) -> SessionStore:
137
+ """Return *self* to support use as a context manager.
138
+
139
+ Author: Ron Webb
140
+ Since: 1.0.0
141
+ """
142
+ return self
143
+
144
+ def __exit__(self, *_: object) -> None:
145
+ """Close the database connection on context manager exit.
146
+
147
+ Author: Ron Webb
148
+ Since: 1.0.0
149
+ """
150
+ self.close()
@@ -0,0 +1,93 @@
1
+ """
2
+ Utility helpers for litscan.
3
+
4
+ Provides :func:`setup_logger` for consistent logging configuration.
5
+
6
+ Author: Ron Webb
7
+ Since: 1.0.0
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import importlib.resources
13
+ import logging
14
+ import logging.config
15
+ import os
16
+ import shutil
17
+ from pathlib import Path
18
+
19
+
20
+ def _load_config(config_path: str) -> None:
21
+ """
22
+ Load ``logging.ini`` from *config_path* via :func:`logging.config.fileConfig`.
23
+
24
+ Falls back to :func:`logging.basicConfig` and emits a warning when the
25
+ file cannot be parsed.
26
+
27
+ Author: Ron Webb
28
+ Since: 1.0.0
29
+ """
30
+ try:
31
+ logging.config.fileConfig(config_path, disable_existing_loggers=False)
32
+ except Exception: # pylint: disable=broad-exception-caught
33
+ logging.basicConfig(level=logging.INFO)
34
+ logging.exception(
35
+ "Failed to load logging config from %s. Using basic configuration.",
36
+ config_path,
37
+ )
38
+
39
+
40
+ def _load_packaged_config() -> None:
41
+ """
42
+ Load the ``logging.ini`` bundled inside the ``litscan`` package using
43
+ :mod:`importlib.resources`.
44
+
45
+ Author: Ron Webb
46
+ Since: 1.0.0
47
+ """
48
+ pkg_ref = importlib.resources.files("litscan").joinpath("logging.ini")
49
+ with importlib.resources.as_file(pkg_ref) as src_path:
50
+ _load_config(str(src_path))
51
+
52
+
53
+ def _ensure_config_dir(config_dir: Path) -> Path:
54
+ """
55
+ Create *config_dir* if it does not exist and copy the packaged
56
+ ``logging.ini`` into it when the file is absent.
57
+
58
+ Returns the path to ``logging.ini`` inside *config_dir*.
59
+
60
+ Author: Ron Webb
61
+ Since: 1.0.0
62
+ """
63
+ config_dir.mkdir(parents=True, exist_ok=True)
64
+ target = config_dir / "logging.ini"
65
+ if not target.exists():
66
+ pkg_ref = importlib.resources.files("litscan").joinpath("logging.ini")
67
+ with importlib.resources.as_file(pkg_ref) as src_path:
68
+ shutil.copy2(src_path, target)
69
+ return target
70
+
71
+
72
+ def setup_logger(name: str) -> logging.Logger:
73
+ """
74
+ Set up and return a logger with consistent configuration.
75
+
76
+ Resolution order for ``logging.ini``:
77
+
78
+ 1. ``LITSCAN_CONFIG_DIR`` environment variable — when set, the directory
79
+ is created if necessary, the packaged ``logging.ini`` is seeded into
80
+ it on first run, and the file is loaded from there.
81
+ 2. Bundled ``logging.ini`` inside the ``litscan`` package — used directly
82
+ via :mod:`importlib.resources` when ``LITSCAN_CONFIG_DIR`` is not set.
83
+
84
+ Author: Ron Webb
85
+ Since: 1.0.0
86
+ """
87
+ litscan_config_dir = os.environ.get("LITSCAN_CONFIG_DIR")
88
+ if litscan_config_dir:
89
+ config_path = str(_ensure_config_dir(Path(litscan_config_dir)))
90
+ _load_config(config_path)
91
+ else:
92
+ _load_packaged_config()
93
+ return logging.getLogger(name)
@@ -0,0 +1,32 @@
1
+ [project]
2
+ name = "litscan"
3
+ version = "1.0.0"
4
+ description = "A small CLI tool that scans a codebase for string and numeric literals, helping you quickly spot hard-coded values in source files."
5
+ authors = [
6
+ {name = "Ron Webb",email = "ron@ronella.xyz"}
7
+ ]
8
+ requires-python = ">=3.14"
9
+ dependencies = [
10
+ "rich (>=15.0.0,<16.0.0)",
11
+ "click (>=8.0.0,<9.0.0)"
12
+ ]
13
+
14
+ [project.scripts]
15
+ litscan = "litscan.cli:main"
16
+
17
+
18
+ [tool.poetry]
19
+ packages = [{include = "litscan"}]
20
+ include = ["litscan/logging.ini"]
21
+
22
+ [build-system]
23
+ requires = ["poetry-core>=2.0.0,<3.0.0"]
24
+ build-backend = "poetry.core.masonry.api"
25
+
26
+ [dependency-groups]
27
+ dev = [
28
+ "black (>=26.5.1,<27.0.0)",
29
+ "pylint (>=4.0.5,<5.0.0)",
30
+ "pytest (>=9.0.3,<10.0.0)",
31
+ "pytest-cov (>=7.1.0,<8.0.0)"
32
+ ]