PyPI - litscan - Versions diffs - 1.0.0__tar.gz - Mend

litscan 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

litscan-1.0.0/PKG-INFO +11 -0
litscan-1.0.0/litscan/__init__.py +7 -0
litscan-1.0.0/litscan/cli.py +264 -0
litscan-1.0.0/litscan/logging.ini +28 -0
litscan-1.0.0/litscan/reporter.py +356 -0
litscan-1.0.0/litscan/scanner.py +141 -0
litscan-1.0.0/litscan/store.py +150 -0
litscan-1.0.0/litscan/util.py +93 -0
litscan-1.0.0/pyproject.toml +32 -0

litscan-1.0.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,11 @@
+Metadata-Version: 2.4
+Name: litscan
+Version: 1.0.0
+Summary: A small CLI tool that scans a codebase for string and numeric literals, helping you quickly spot hard-coded values in source files.
+Author: Ron Webb
+Author-email: ron@ronella.xyz
+Requires-Python: >=3.14
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.14
+Requires-Dist: click (>=8.0.0,<9.0.0)
+Requires-Dist: rich (>=15.0.0,<16.0.0)

litscan-1.0.0/litscan/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""litscan package.
+Author: Ron Webb
+Since: 1.0.0
+"""
+__version__ = "1.0.0"

litscan-1.0.0/litscan/cli.py ADDED Viewed

@@ -0,0 +1,264 @@
+"""Command-line interface for litscan.
+Author: Ron Webb
+Since: 1.0.0
+"""
+from __future__ import annotations
+import concurrent.futures
+import os
+import tempfile
+import uuid
+from pathlib import Path
+import click
+from rich.console import Console
+from rich.progress import (
+    BarColumn,
+    MofNCompleteColumn,
+    Progress,
+    SpinnerColumn,
+    TextColumn,
+    TimeElapsedColumn,
+)
+from . import __version__
+from .reporter import write_outputs
+from .scanner import scan_file
+from .store import SessionStore
+from .util import setup_logger
+_VALID_FORMATS = ("json", "html", "all")
+_APP_NAME = "litscan"
+_console = Console(stderr=True)
+_logger = setup_logger(__name__)
+def _parse_extensions(raw: str) -> list[str]:
+    """Parse a comma-separated extension string into a normalised list.
+    Each entry is lowercased and prefixed with a dot when absent.
+    Example: ``"py,js, TS"`` → ``['.py', '.js', '.ts']``
+    Author: Ron Webb
+    Since: 1.0.0
+    """
+    result: list[str] = []
+    for part in raw.split(","):
+        ext = part.strip().lower()
+        if ext and not ext.startswith("."):
+            ext = "." + ext
+        if ext:
+            result.append(ext)
+    return result
+def _parse_paths(raw: str) -> list[Path]:
+    """Parse a semicolon-separated path string into a list of Path objects.
+    Example: ``"/src/a; /src/b"`` → ``[Path('/src/a'), Path('/src/b')]``
+    Author: Ron Webb
+    Since: 1.0.0
+    """
+    result: list[Path] = []
+    for part in raw.split(";"):
+        stripped = part.strip()
+        if stripped:
+            result.append(Path(stripped))
+    return result
+def _scan_and_store(task: tuple[Path, SessionStore, str]) -> None:
+    """Scan one file and write its occurrences to the session store.
+    Accepts a 3-tuple so the function can be passed directly to
+    :meth:`concurrent.futures.Executor.map` without a closure.
+    Author: Ron Webb
+    Since: 1.0.0
+    """
+    file_path, store, session_id = task
+    store.insert_occurrences(session_id, scan_file(file_path))
+def discover_files(path: Path, extensions: list[str]) -> list[Path]:
+    """Discover files under *path* that match the given extensions.
+    When *extensions* is empty every file is included.
+    Both files and directories are accepted; for a plain file the extension
+    filter still applies.
+    Author: Ron Webb
+    Since: 1.0.0
+    """
+    candidates: list[Path]
+    if path.is_file():
+        candidates = [path]
+    elif path.is_dir():
+        candidates = sorted(f for f in path.rglob("*") if f.is_file())
+    else:
+        return []
+    if not extensions:
+        return candidates
+    return [f for f in candidates if f.suffix.lower() in extensions]
+def _run_concurrent_scan(
+    files: list[Path],
+    store: SessionStore,
+    session_id: str,
+    workers: int,
+) -> None:
+    """Scan *files* concurrently and store results under *session_id*.
+    Displays a live progress bar via the module-level rich console.
+    Author: Ron Webb
+    Since: 1.0.0
+    """
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        BarColumn(),
+        MofNCompleteColumn(),
+        TimeElapsedColumn(),
+        console=_console,
+        transient=True,
+    ) as progress:
+        task = progress.add_task("[cyan]Scanning\u2026", total=len(files))
+        with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
+            futures = {
+                executor.submit(_scan_and_store, (f, store, session_id)): f
+                for f in files
+            }
+            for future in concurrent.futures.as_completed(futures):
+                try:
+                    future.result()
+                except Exception as exc:  # pylint: disable=broad-exception-caught
+                    _logger.warning("Failed to scan %s: %s", futures[future], exc)
+                progress.advance(task)
+@click.command()
+@click.argument("path")
+@click.option(
+    "--ext",
+    default="",
+    help=(
+        "Comma-separated file extensions to include "
+        "(e.g. py,java,js,ts). Omit to scan all files."
+    ),
+)
+@click.option(
+    "--output",
+    default="litscan-output",
+    help=(
+        "Base name (without extension) for the output file(s) "
+        "(default: litscan-output)."
+    ),
+)
+@click.option(
+    "--output-dir",
+    "output_dir",
+    default="reports",
+    type=click.Path(path_type=Path),
+    help=(
+        "Directory where the output file will be written "
+        "(default: reports). "
+        "The filename from --output is placed inside this directory."
+    ),
+)
+@click.option(
+    "--format",
+    "fmt",
+    default="json",
+    type=click.Choice(_VALID_FORMATS),
+    help="Output format: json, html, or all (default: json).",
+)
+@click.option(
+    "--workers",
+    type=int,
+    default=min(32, (os.cpu_count() or 1) + 4),
+    help=(
+        "Number of parallel worker threads used to scan files "
+        "(default: min(32, cpu_count + 4))."
+    ),
+)
+@click.option(
+    "--db",
+    "db_path",
+    default=str(Path(tempfile.gettempdir()) / "litscan.db"),
+    type=click.Path(path_type=Path),
+    help=(
+        "Path to the SQLite scratch database used to store occurrences "
+        "during a scan run (default: <system-temp>/litscan.db). "
+        "Session records are removed after the report is written."
+    ),
+)
+def main(  # pylint: disable=too-many-arguments,too-many-positional-arguments,too-many-locals
+    path: str,
+    ext: str,
+    output: str,
+    output_dir: Path,
+    fmt: str,
+    workers: int,
+    db_path: Path,
+) -> None:
+    """Scan source files for string and numeric literals.
+    Author: Ron Webb
+    Since: 1.0.0
+    """
+    _header = f"{_APP_NAME} v{__version__}"
+    _logger.info(_header)
+    _console.print(f"[bold]{_header}[/bold]")
+    extensions = _parse_extensions(ext) if ext else []
+    paths = _parse_paths(path)
+    seen: set[Path] = set()
+    files: list[Path] = []
+    with _console.status("[bold cyan]Discovering files\u2026", spinner="dots"):
+        for target_path in paths:
+            for found_file in discover_files(target_path, extensions):
+                if found_file not in seen:
+                    seen.add(found_file)
+                    files.append(found_file)
+    if not files:
+        _logger.info("No files found in %s", path)
+        _console.print("[yellow]No files found.[/yellow]")
+        return
+    _console.print(f"[bold]Scanning[/bold] {len(files)} file(s)\u2026")
+    session_id = str(uuid.uuid4())
+    store = SessionStore(db_path)
+    try:
+        _run_concurrent_scan(files, store, session_id, workers)
+        groups = store.read_groups(session_id)
+        stem = Path(output).stem
+        written = write_outputs(groups, output_dir, stem, fmt)
+    finally:
+        store.delete_session(session_id)
+        store.close()
+    total = sum(g["count"] for g in groups)
+    _logger.info(
+        "Found %s literals (%s unique) -> %s",
+        total,
+        len(groups),
+        ", ".join(str(p) for p in written),
+    )
+    _console.print(
+        f"[bold green]\u2713[/bold green] "
+        f"[bold]{total}[/bold] literals "
+        f"([bold]{len(groups)}[/bold] unique) "
+        f"\u2192 {', '.join(str(p) for p in written)}"
+    )
+if __name__ == "__main__":
+    main()  # pylint: disable=no-value-for-parameter

litscan-1.0.0/litscan/logging.ini ADDED Viewed

@@ -0,0 +1,28 @@
+[loggers]
+keys=root
+[handlers]
+keys=consoleHandler,fileHandler
+[formatters]
+keys=logFormatter,consoleFormatter
+[logger_root]
+level=INFO
+handlers=fileHandler
+[handler_consoleHandler]
+class=StreamHandler
+formatter=consoleFormatter
+args=(sys.stderr,)
+[handler_fileHandler]
+class=FileHandler
+formatter=logFormatter
+args=('litscan.log', 'a')
+[formatter_logFormatter]
+format=%(asctime)s [%(levelname)s] %(name)s - %(message)s
+[formatter_consoleFormatter]
+format=%(asctime)s - %(name)s - %(levelname)s - %(message)s

litscan-1.0.0/litscan/reporter.py ADDED Viewed

@@ -0,0 +1,356 @@
+"""Report generation for litscan: JSON and HTML output writers.
+Author: Ron Webb
+Since: 1.0.0
+"""
+from __future__ import annotations
+import html as _html
+import json
+from datetime import datetime
+from pathlib import Path
+from . import __version__
+from .scanner import LiteralGroup, ScanReport
+_DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
+_CSS = """
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body {
+    font-family: 'Segoe UI', Arial, sans-serif;
+    background: #f5f7fa;
+    color: #2c3e50;
+    padding: 2em;
+  }
+  header {
+    background: #1a3a5c;
+    color: #ffffff;
+    border-radius: 8px;
+    padding: 1.2em 1.8em;
+    margin-bottom: 1.5em;
+  }
+  header h1 { font-size: 1.6em; font-weight: 700; letter-spacing: 0.02em; }
+  header p  { font-size: 0.9em; margin-top: 0.3em; opacity: 0.85; }
+  .summary {
+    font-size: 0.9em;
+    color: #555;
+    margin-bottom: 1em;
+  }
+  .table-wrap { overflow-x: auto; border-radius: 8px; box-shadow: 0 2px 8px rgba(0,0,0,.12); }
+  table {
+    border-collapse: collapse;
+    width: 100%;
+    background: #ffffff;
+    font-size: 0.88em;
+  }
+  thead tr { background: #1a3a5c; color: #ffffff; }
+  th {
+    padding: 0.75em 1em;
+    text-align: left;
+    font-weight: 600;
+    letter-spacing: 0.03em;
+    white-space: nowrap;
+  }
+  th.sortable { cursor: pointer; user-select: none; }
+  th.sortable:hover { background: #254e80; }
+  th.sortable .sort-icon { margin-left: 0.4em; font-size: 0.8em; }
+  th.sortable.asc .sort-icon::after { content: '\\25b2'; }
+  th.sortable.desc .sort-icon::after { content: '\\25bc'; }
+  th.sortable:not(.asc):not(.desc) .sort-icon::after { content: '\\21c5'; opacity: 0.6; }
+  .filter-row th { background: #1a3a5c; padding: 0.3em 1em 0.5em; }
+  .filter-row input {
+    width: 100%;
+    padding: 0.3em 0.5em;
+    border: 1px solid #3d6b9e;
+    border-radius: 4px;
+    background: #1e4575;
+    color: #fff;
+    font-size: 0.85em;
+    outline: none;
+  }
+  .filter-row input::placeholder { color: #aac4e8; }
+  .filter-row input:focus { border-color: #7eb3e8; background: #255085; }
+  td {
+    padding: 0.6em 1em;
+    border-bottom: 1px solid #e8ecf0;
+    vertical-align: top;
+  }
+  tbody tr.alt-row { background: #f0f4f8; }
+  tbody tr:hover { background: #dde9f7; }
+  td.row-num { text-align: right; color: #7f8c8d; font-variant-numeric: tabular-nums; width: 3em; }
+  td.count    { text-align: center; font-weight: 600; color: #1a3a5c; width: 5em; }
+  td.literal  code {
+    background: #eef2f7;
+    border-radius: 3px;
+    padding: 0.1em 0.4em;
+    font-family: 'Consolas', 'Courier New', monospace;
+    font-size: 0.95em;
+    word-break: break-all;
+  }
+  td.literal code .truncated {
+    color: #999;
+    font-style: italic;
+    cursor: help;
+    user-select: none;
+  }
+  td.locations { font-family: 'Consolas', 'Courier New', monospace; font-size: 0.82em; color: #555; word-break: break-all; }
+  footer { margin-top: 1.5em; font-size: 0.8em; color: #aaa; text-align: center; }
+"""
+def _write_json(groups: list[LiteralGroup], path: Path, run_date: str) -> None:
+    """Write literal groups as a JSON file.
+    Author: Ron Webb
+    Since: 1.0.0
+    """
+    report: ScanReport = {
+        "application": "litscan",
+        "version": __version__,
+        "run-date": run_date,
+        "findings": groups,
+    }
+    path.write_text(json.dumps(report, indent=2), encoding="utf-8")
+_TRUNCATED_MARKER = '<span class="truncated" title="Multiline literal \u2014 only first line shown">\u2026</span>'
+def _literal_display(literal: str) -> str:
+    """Return HTML for the first line of *literal* with a truncation marker when multiline.
+    Author: Ron Webb
+    Since: 1.0.0
+    """
+    first_line, _, rest = literal.partition("\n")
+    display = _html.escape(first_line, quote=False)
+    if rest:
+        display += _TRUNCATED_MARKER
+    return display
+def _build_thead_html() -> str:
+    """Return the HTML ``<thead>`` element with sortable column headers and an inline filter row.
+    Author: Ron Webb
+    Since: 1.0.0
+    """
+    return (
+        "    <thead>\n"
+        "      <tr>"
+        "<th>#</th>"
+        '<th class="sortable" onclick="litscanSortBy(0)">Literal<span class="sort-icon"></span></th>'
+        '<th class="sortable" onclick="litscanSortBy(1)">Count<span class="sort-icon"></span></th>'
+        "<th>Locations</th>"
+        "</tr>\n"
+        '      <tr class="filter-row">'
+        "<th></th>"
+        '<th><input type="text" id="filter-literal" placeholder="Filter literal\u2026"'
+        ' oninput="litscanFilter()"></th>'
+        '<th><input type="text" id="filter-count" placeholder="e.g. &gt;5"'
+        ' oninput="litscanFilter()"></th>'
+        "<th></th>"
+        "</tr>\n"
+        "    </thead>\n"
+    )
+def _build_html_scaffold(
+    run_date: str, total: int, unique: int, rows_html: str, script: str
+) -> str:
+    """Wrap table body and script into a complete HTML document.
+    Calls :func:`_build_thead_html` to produce the sortable column headers.
+    Author: Ron Webb
+    Since: 1.0.0
+    """
+    thead = _build_thead_html()
+    return (
+        "<!DOCTYPE html>\n"
+        '<html lang="en">\n'
+        "<head>\n"
+        '  <meta charset="UTF-8">\n'
+        '  <meta name="viewport" content="width=device-width, initial-scale=1">\n'
+        f"  <title>LitScan {__version__} Report</title>\n"
+        "  <style>\n"
+        f"{_CSS}"
+        "  </style>\n"
+        "</head>\n"
+        "<body>\n"
+        "  <header>\n"
+        f"    <h1>LitScan {__version__} Report</h1>\n"
+        f"    <p>Date Run: {run_date}</p>\n"
+        "  </header>\n"
+        f'  <p class="summary">Found {total} literals &mdash; {unique} unique</p>\n'
+        '  <div class="table-wrap">\n'
+        "  <table>\n"
+        f"{thead}"
+        "    <tbody>\n"
+        f"{rows_html}\n"
+        "    </tbody>\n"
+        "  </table>\n"
+        "  </div>\n"
+        f"  <footer>Generated by LitScan {__version__}</footer>\n"
+        f"{script}"
+        "</body>\n"
+        "</html>\n"
+    )
+def _build_html(groups: list[LiteralGroup], run_date: str) -> str:
+    """Build an HTML report string for the given literal groups.
+    Author: Ron Webb
+    Since: 1.0.0
+    """
+    total = sum(g["count"] for g in groups)
+    unique = len(groups)
+    rows: list[str] = []
+    for idx, group in enumerate(groups, start=1):
+        literal_display = _literal_display(group["literal"])
+        literal_attr = _html.escape(group["literal"], quote=True)
+        count = group["count"]
+        locations = "<br>".join(_html.escape(f, quote=False) for f in group["files"])
+        alt = " alt-row" if idx % 2 == 0 else ""
+        rows.append(
+            f'      <tr class="data-row{alt}"'
+            f' data-idx="{idx}"'
+            f' data-literal="{literal_attr}"'
+            f' data-count="{count}">'
+            f'<td class="row-num">{idx}</td>'
+            f'<td class="literal"><code>{literal_display}</code></td>'
+            f'<td class="count">{count}</td>'
+            f'<td class="locations">{locations}</td>'
+            f"</tr>"
+        )
+    rows_html = "\n".join(rows)
+    script = (
+        "<script>\n"
+        "(function () {\n"
+        "  var sortCol = -1, sortDir = 1;\n"
+        "\n"
+        "  function sortBy(col) {\n"
+        "    if (sortCol === col) { sortDir = -sortDir; }\n"
+        "    else { sortCol = col; sortDir = 1; }\n"
+        "    applySort();\n"
+        "  }\n"
+        "\n"
+        "  function applySort() {\n"
+        "    var tbody = document.querySelector('tbody');\n"
+        "    var rows = Array.prototype.slice.call(tbody.querySelectorAll('tr'));\n"
+        "    rows.sort(function (a, b) {\n"
+        "      var va, vb;\n"
+        "      if (sortCol === 0) {\n"
+        "        va = a.dataset.literal.toLowerCase();\n"
+        "        vb = b.dataset.literal.toLowerCase();\n"
+        "      } else if (sortCol === 1) {\n"
+        "        va = parseInt(a.dataset.count, 10);\n"
+        "        vb = parseInt(b.dataset.count, 10);\n"
+        "      } else { return 0; }\n"
+        "      if (va < vb) return -sortDir;\n"
+        "      if (va > vb) return sortDir;\n"
+        "      return 0;\n"
+        "    });\n"
+        "    rows.forEach(function (r) { tbody.appendChild(r); });\n"
+        "    updateNumbers();\n"
+        "    restripe();\n"
+        "    updateSortIcons();\n"
+        "  }\n"
+        "\n"
+        "  function updateSortIcons() {\n"
+        "    var ths = document.querySelectorAll('thead tr:first-child th.sortable');\n"
+        "    ths.forEach(function (th, i) {\n"
+        "      th.classList.remove('asc', 'desc');\n"
+        "      if (i === sortCol) { th.classList.add(sortDir === 1 ? 'asc' : 'desc'); }\n"
+        "    });\n"
+        "  }\n"
+        "\n"
+        "  function matchCount(count, filter) {\n"
+        "    var m = filter.match(/^(>=|<=|>|<|=)?(\\d+)$/);\n"
+        "    if (!m) return true;\n"
+        "    var op = m[1] || '=', val = parseInt(m[2], 10);\n"
+        "    if (op === '>') return count > val;\n"
+        "    if (op === '<') return count < val;\n"
+        "    if (op === '>=') return count >= val;\n"
+        "    if (op === '<=') return count <= val;\n"
+        "    return count === val;\n"
+        "  }\n"
+        "\n"
+        "  function applyFilter() {\n"
+        "    var litFilter = document.getElementById('filter-literal').value.toLowerCase();\n"
+        "    var cntFilter = document.getElementById('filter-count').value.trim();\n"
+        "    var tbody = document.querySelector('tbody');\n"
+        "    tbody.querySelectorAll('tr').forEach(function (row) {\n"
+        "      var litMatch = !litFilter || row.dataset.literal.toLowerCase().indexOf(litFilter) !== -1;\n"
+        "      var cntMatch = !cntFilter || matchCount(parseInt(row.dataset.count, 10), cntFilter);\n"
+        "      row.style.display = (litMatch && cntMatch) ? '' : 'none';\n"
+        "    });\n"
+        "    updateNumbers();\n"
+        "    restripe();\n"
+        "  }\n"
+        "\n"
+        "  function updateNumbers() {\n"
+        "    var num = 1;\n"
+        "    document.querySelectorAll('tbody tr').forEach(function (row) {\n"
+        "      if (row.style.display !== 'none') {\n"
+        "        row.querySelector('.row-num').textContent = num++;\n"
+        "      }\n"
+        "    });\n"
+        "  }\n"
+        "\n"
+        "  function restripe() {\n"
+        "    var num = 0;\n"
+        "    document.querySelectorAll('tbody tr').forEach(function (row) {\n"
+        "      if (row.style.display !== 'none') {\n"
+        "        num++;\n"
+        "        row.classList.toggle('alt-row', num % 2 === 0);\n"
+        "      }\n"
+        "    });\n"
+        "  }\n"
+        "\n"
+        "  window.litscanSortBy = sortBy;\n"
+        "  window.litscanFilter = applyFilter;\n"
+        "})();\n"
+        "</script>\n"
+    )
+    return _build_html_scaffold(run_date, total, unique, rows_html, script)
+def _write_html(groups: list[LiteralGroup], path: Path, run_date: str) -> None:
+    """Write literal groups as an HTML report file.
+    Author: Ron Webb
+    Since: 1.0.0
+    """
+    path.write_text(_build_html(groups, run_date), encoding="utf-8")
+def write_outputs(
+    groups: list[LiteralGroup],
+    output_dir: Path,
+    stem: str,
+    fmt: str,
+) -> list[Path]:
+    """Write output files according to the requested format.
+    Returns the list of paths written.
+    Author: Ron Webb
+    Since: 1.0.0
+    """
+    output_dir.mkdir(parents=True, exist_ok=True)
+    run_date = datetime.now().strftime(_DATE_FORMAT)
+    written: list[Path] = []
+    if fmt in ("json", "all"):
+        json_path = output_dir / f"{stem}.json"
+        _write_json(groups, json_path, run_date)
+        written.append(json_path)
+    if fmt in ("html", "all"):
+        html_path = output_dir / f"{stem}.html"
+        _write_html(groups, html_path, run_date)
+        written.append(html_path)
+    return written

litscan-1.0.0/litscan/scanner.py ADDED Viewed

@@ -0,0 +1,141 @@
+"""Literal scanner utilities.
+Author: Ron Webb
+Since: 1.0.0
+"""
+from __future__ import annotations
+import bisect
+import re
+from dataclasses import dataclass
+from pathlib import Path
+from typing import TypedDict
+# Ordered alternation: triple-quoted blocks first (multiline), then single-line
+# strings, then decimal numbers, then integers.
+_PATTERN = re.compile(
+    r'"""[\s\S]*?"""'
+    r"|'''[\s\S]*?'''"
+    r'|"(?:[^"\\\n]|\\.)*"'
+    r"|'(?:[^'\\\n]|\\.)*'"
+    r"|\b\d+\.\d+\b"
+    r"|\b\d+\b",
+)
+@dataclass(frozen=True)
+class LiteralOccurrence:
+    """Represents a discovered literal value in source code.
+    Author: Ron Webb
+    Since: 1.0.0
+    """
+    file_path: Path
+    line: int
+    column: int
+    value: str
+def _build_line_offsets(source: str) -> list[int]:
+    """Return a list of character offsets where each line starts (0-indexed).
+    The result always begins with ``0`` (start of line 1). Each subsequent
+    entry is the offset of the first character on the following line.
+    Precomputing this once gives O(log n) line/column lookup per match via
+    :func:`_line_and_column`, instead of the naive O(n) slice-and-scan.
+    Author: Ron Webb
+    Since: 1.0.0
+    """
+    offsets: list[int] = [0]
+    start = 0
+    while True:
+        pos = source.find("\n", start)
+        if pos == -1:
+            break
+        offsets.append(pos + 1)
+        start = pos + 1
+    return offsets
+def _line_and_column(line_offsets: list[int], offset: int) -> tuple[int, int]:
+    """Return 1-based line and 0-based column for a character offset in source.
+    *line_offsets* must be the list returned by :func:`_build_line_offsets`.
+    Uses :func:`bisect.bisect_right` for O(log n) lookup.
+    Author: Ron Webb
+    Since: 1.0.0
+    """
+    line = bisect.bisect_right(line_offsets, offset)
+    col = offset - line_offsets[line - 1]
+    return line, col
+def scan_literals(source: str, file_path: Path) -> list[LiteralOccurrence]:
+    """Scan source text and collect string and numeric literals.
+    Works with any language or plain text file. Detects:
+    - Block strings/text enclosed with triple double or triple single quotes
+      (may span multiple lines).
+    - Strings/text enclosed with double or single quotes (single line).
+    - Decimal and integer numbers.
+    Author: Ron Webb
+    Since: 1.0.0
+    """
+    line_offsets = _build_line_offsets(source)
+    occurrences: list[LiteralOccurrence] = []
+    for match in _PATTERN.finditer(source):
+        line, column = _line_and_column(line_offsets, match.start())
+        occurrences.append(
+            LiteralOccurrence(
+                file_path=file_path,
+                line=line,
+                column=column,
+                value=match.group(),
+            )
+        )
+    return occurrences
+def scan_file(file_path: Path) -> list[LiteralOccurrence]:
+    """Read *file_path* from disk and return all literal occurrences found in it.
+    Convenience wrapper around :func:`scan_literals` intended for parallel
+    execution: a single callable that handles both I/O and scanning so it can
+    be submitted directly to a :class:`concurrent.futures.Executor`.
+    Author: Ron Webb
+    Since: 1.0.0
+    """
+    contents = file_path.read_text(encoding="utf-8", errors="replace")
+    return scan_literals(contents, file_path)
+class LiteralGroup(TypedDict):
+    """JSON-serialisable representation of a grouped literal.
+    Author: Ron Webb
+    Since: 1.0.0
+    """
+    count: int
+    literal: str
+    files: list[str]
+# ScanReport uses the functional TypedDict syntax because "run-date" is not a
+# valid Python identifier.  Docstrings are not supported in this form; see the
+# individual field names for documentation of the report structure.
+ScanReport = TypedDict(
+    "ScanReport",
+    {
+        "application": str,
+        "version": str,
+        "run-date": str,
+        "findings": list[LiteralGroup],
+    },
+)

litscan-1.0.0/litscan/store.py ADDED Viewed

@@ -0,0 +1,150 @@
+"""Session-scoped SQLite store for literal occurrences.
+Occurrences produced during parallel file scanning are written directly to an
+SQLite database instead of accumulated in memory.  Every scan run is assigned
+a UUID so multiple concurrent invocations share the same database file without
+interference.  After the report is written the caller deletes the session
+records, keeping the database lean.
+Author: Ron Webb
+Since: 1.0.0
+"""
+from __future__ import annotations
+import sqlite3
+import threading
+from pathlib import Path
+from .scanner import LiteralGroup, LiteralOccurrence
+_CREATE_TABLE = (
+    "CREATE TABLE IF NOT EXISTS occurrences ("
+    "    session_id TEXT NOT NULL,"
+    "    file_path  TEXT NOT NULL,"
+    "    line       INTEGER NOT NULL,"
+    "    col        INTEGER NOT NULL,"
+    "    value      TEXT NOT NULL"
+    ")"
+)
+_CREATE_INDEX = "CREATE INDEX IF NOT EXISTS idx_session ON occurrences (session_id)"
+_INSERT = (
+    "INSERT INTO occurrences (session_id, file_path, line, col, value)"
+    " VALUES (?, ?, ?, ?, ?)"
+)
+_SELECT_GROUPS = (
+    "SELECT value, COUNT(*) AS cnt,"
+    " GROUP_CONCAT(file_path || ':' || line || ':' || col, '|||')"
+    " FROM occurrences WHERE session_id = ?"
+    " GROUP BY value"
+    " ORDER BY cnt DESC, value ASC"
+)
+_DELETE_SESSION = "DELETE FROM occurrences WHERE session_id = ?"
+# Separator used inside GROUP_CONCAT; must not appear in file paths or loc strings.
+_LOC_SEP = "|||"
+class SessionStore:
+    """SQLite-backed session store for literal occurrences.
+    A single database file is shared across all threads and concurrent runs.
+    Every run is identified by a *session_id* (UUID string) so records are
+    always isolated.  Grouping and aggregation are performed entirely in SQL so
+    Python never holds all raw occurrences in memory at once.
+    Author: Ron Webb
+    Since: 1.0.0
+    """
+    def __init__(self, db_path: Path) -> None:
+        """Open (or create) the SQLite database at *db_path*.
+        WAL journal mode is enabled so reads and writes do not block each
+        other; a threading lock serialises Python-side connection calls since
+        :mod:`sqlite3` connection objects are not thread-safe.
+        Author: Ron Webb
+        Since: 1.0.0
+        """
+        self._conn = sqlite3.connect(db_path, check_same_thread=False)
+        self._lock = threading.Lock()
+        with self._lock:
+            self._conn.execute("PRAGMA journal_mode=WAL")
+            self._conn.execute("PRAGMA synchronous=NORMAL")
+            self._conn.execute(_CREATE_TABLE)
+            self._conn.execute(_CREATE_INDEX)
+            self._conn.commit()
+    def insert_occurrences(
+        self, session_id: str, occurrences: list[LiteralOccurrence]
+    ) -> None:
+        """Persist *occurrences* for *session_id* in the database.
+        Author: Ron Webb
+        Since: 1.0.0
+        """
+        rows = [
+            (session_id, str(o.file_path), o.line, o.column, o.value)
+            for o in occurrences
+        ]
+        with self._lock:
+            self._conn.executemany(_INSERT, rows)
+            self._conn.commit()
+    def read_groups(self, session_id: str) -> list[LiteralGroup]:
+        """Return grouped literals for *session_id*, aggregated in SQL.
+        Grouping and counting are done entirely inside SQLite; only the final
+        :class:`~litscan.scanner.LiteralGroup` objects are constructed in
+        Python, so memory usage is proportional to the number of *unique*
+        literals, not the total number of occurrences.
+        Author: Ron Webb
+        Since: 1.0.0
+        """
+        with self._lock:
+            rows = self._conn.execute(_SELECT_GROUPS, (session_id,)).fetchall()
+        return [
+            LiteralGroup(
+                count=row[1],
+                literal=row[0],
+                files=row[2].split(_LOC_SEP) if row[2] else [],
+            )
+            for row in rows
+        ]
+    def delete_session(self, session_id: str) -> None:
+        """Remove all occurrences belonging to *session_id* from the database.
+        Author: Ron Webb
+        Since: 1.0.0
+        """
+        with self._lock:
+            self._conn.execute(_DELETE_SESSION, (session_id,))
+            self._conn.commit()
+    def close(self) -> None:
+        """Close the underlying database connection.
+        Author: Ron Webb
+        Since: 1.0.0
+        """
+        with self._lock:
+            self._conn.close()
+    def __enter__(self) -> SessionStore:
+        """Return *self* to support use as a context manager.
+        Author: Ron Webb
+        Since: 1.0.0
+        """
+        return self
+    def __exit__(self, *_: object) -> None:
+        """Close the database connection on context manager exit.
+        Author: Ron Webb
+        Since: 1.0.0
+        """
+        self.close()

litscan-1.0.0/litscan/util.py ADDED Viewed

@@ -0,0 +1,93 @@
+"""
+Utility helpers for litscan.
+Provides :func:`setup_logger` for consistent logging configuration.
+Author: Ron Webb
+Since: 1.0.0
+"""
+from __future__ import annotations
+import importlib.resources
+import logging
+import logging.config
+import os
+import shutil
+from pathlib import Path
+def _load_config(config_path: str) -> None:
+    """
+    Load ``logging.ini`` from *config_path* via :func:`logging.config.fileConfig`.
+    Falls back to :func:`logging.basicConfig` and emits a warning when the
+    file cannot be parsed.
+    Author: Ron Webb
+    Since: 1.0.0
+    """
+    try:
+        logging.config.fileConfig(config_path, disable_existing_loggers=False)
+    except Exception:  # pylint: disable=broad-exception-caught
+        logging.basicConfig(level=logging.INFO)
+        logging.exception(
+            "Failed to load logging config from %s. Using basic configuration.",
+            config_path,
+        )
+def _load_packaged_config() -> None:
+    """
+    Load the ``logging.ini`` bundled inside the ``litscan`` package using
+    :mod:`importlib.resources`.
+    Author: Ron Webb
+    Since: 1.0.0
+    """
+    pkg_ref = importlib.resources.files("litscan").joinpath("logging.ini")
+    with importlib.resources.as_file(pkg_ref) as src_path:
+        _load_config(str(src_path))
+def _ensure_config_dir(config_dir: Path) -> Path:
+    """
+    Create *config_dir* if it does not exist and copy the packaged
+    ``logging.ini`` into it when the file is absent.
+    Returns the path to ``logging.ini`` inside *config_dir*.
+    Author: Ron Webb
+    Since: 1.0.0
+    """
+    config_dir.mkdir(parents=True, exist_ok=True)
+    target = config_dir / "logging.ini"
+    if not target.exists():
+        pkg_ref = importlib.resources.files("litscan").joinpath("logging.ini")
+        with importlib.resources.as_file(pkg_ref) as src_path:
+            shutil.copy2(src_path, target)
+    return target
+def setup_logger(name: str) -> logging.Logger:
+    """
+    Set up and return a logger with consistent configuration.
+    Resolution order for ``logging.ini``:
+    1. ``LITSCAN_CONFIG_DIR`` environment variable — when set, the directory
+       is created if necessary, the packaged ``logging.ini`` is seeded into
+       it on first run, and the file is loaded from there.
+    2. Bundled ``logging.ini`` inside the ``litscan`` package — used directly
+       via :mod:`importlib.resources` when ``LITSCAN_CONFIG_DIR`` is not set.
+    Author: Ron Webb
+    Since: 1.0.0
+    """
+    litscan_config_dir = os.environ.get("LITSCAN_CONFIG_DIR")
+    if litscan_config_dir:
+        config_path = str(_ensure_config_dir(Path(litscan_config_dir)))
+        _load_config(config_path)
+    else:
+        _load_packaged_config()
+    return logging.getLogger(name)

litscan-1.0.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,32 @@
+[project]
+name = "litscan"
+version = "1.0.0"
+description = "A small CLI tool that scans a codebase for string and numeric literals, helping you quickly spot hard-coded values in source files."
+authors = [
+    {name = "Ron Webb",email = "ron@ronella.xyz"}
+]
+requires-python = ">=3.14"
+dependencies = [
+    "rich (>=15.0.0,<16.0.0)",
+    "click (>=8.0.0,<9.0.0)"
+]
+[project.scripts]
+litscan = "litscan.cli:main"
+[tool.poetry]
+packages = [{include = "litscan"}]
+include = ["litscan/logging.ini"]
+[build-system]
+requires = ["poetry-core>=2.0.0,<3.0.0"]
+build-backend = "poetry.core.masonry.api"
+[dependency-groups]
+dev = [
+    "black (>=26.5.1,<27.0.0)",
+    "pylint (>=4.0.5,<5.0.0)",
+    "pytest (>=9.0.3,<10.0.0)",
+    "pytest-cov (>=7.1.0,<8.0.0)"
+]