PyPI - llm-code-validator - Versions diffs - 0.1.0__py3-none-any.whl - Mend

llm-code-validator 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

llm_code_validator/__init__.py +3 -0
llm_code_validator/benchmark.py +141 -0
llm_code_validator/cli.py +105 -0
llm_code_validator/core.py +359 -0
llm_code_validator/diagnostics.py +61 -0
llm_code_validator/fixes.py +66 -0
llm_code_validator/formatting.py +43 -0
llm_code_validator/library_signatures.json +842 -0
llm_code_validator/signatures.py +163 -0
llm_code_validator/versioning.py +153 -0
llm_code_validator-0.1.0.dist-info/METADATA +220 -0
llm_code_validator-0.1.0.dist-info/RECORD +16 -0
llm_code_validator-0.1.0.dist-info/WHEEL +5 -0
llm_code_validator-0.1.0.dist-info/entry_points.txt +2 -0
llm_code_validator-0.1.0.dist-info/licenses/LICENSE +21 -0
llm_code_validator-0.1.0.dist-info/top_level.txt +1 -0

llm_code_validator/signatures.py ADDED Viewed

@@ -0,0 +1,163 @@
+from __future__ import annotations
+import json
+from dataclasses import dataclass
+from functools import lru_cache
+from pathlib import Path
+from typing import Any
+REPO_ROOT = Path(__file__).resolve().parents[1]
+PACKAGE_SIGNATURE_PATH = Path(__file__).with_name("library_signatures.json")
+REPO_SIGNATURE_PATH = REPO_ROOT / "data" / "library_signatures.json"
+DEFAULT_SIGNATURE_PATH = PACKAGE_SIGNATURE_PATH if PACKAGE_SIGNATURE_PATH.exists() else REPO_SIGNATURE_PATH
+@dataclass(frozen=True)
+class SignatureRule:
+    library: str
+    symbol: str
+    message: str
+    version_assumption: str
+    severity: str
+    replacement: str | None
+    fix_safety: str
+    evidence: str
+    match_names: tuple[str, ...]
+    match_exact_only: bool = False
+    required_keywords: tuple[str, ...] = ()
+class SignatureValidationError(ValueError):
+    pass
+def _rule_from_entry(library: str, symbol: str, entry: dict[str, Any], current_version: str) -> SignatureRule | None:
+    has_issue = not entry.get("exists", True) or "module_old" in entry or "changed_in" in entry
+    if not has_issue:
+        return None
+    removed_or_changed = entry.get("removed_in") or entry.get("changed_in") or current_version
+    version_assumption = f"{library}>={removed_or_changed}" if removed_or_changed else f"{library} current"
+    reason = entry.get("reason") or entry.get("note") or entry.get("common_mistake") or "Known API drift pattern."
+    replacement = entry.get("replacement") or entry.get("new_import") or entry.get("correct_usage") or entry.get("new_usage")
+    safety = entry.get("fix_safety") or ("suggested_fix" if replacement else "no_fix")
+    severity = "error" if not entry.get("exists", True) else "warning"
+    evidence = entry.get("source_url") or entry.get("source_note") or entry.get("release_note")
+    display_symbol = symbol
+    if symbol in {"append", "mad"} and library == "pandas":
+        display_symbol = f"DataFrame.{symbol}"
+    exact_only = bool(entry.get("match_exact_only", False))
+    configured_match_names = entry.get("match_names", [])
+    old_import = entry.get("old_import")
+    if exact_only and old_import and " import " in old_import:
+        match_names = set(configured_match_names)
+    elif exact_only and configured_match_names:
+        match_names = set(configured_match_names)
+    else:
+        match_names = {symbol, symbol.split(".")[-1]}
+    if old_import and " import " in old_import:
+        import_line = old_import.splitlines()[0]
+        module, name = import_line.removeprefix("from ").rsplit(" import ", 1)
+        if not exact_only:
+            match_names.add(name.strip())
+        match_names.add(f"{module.strip()}.{name.strip()}")
+    old_usage = entry.get("old_usage") or entry.get("old_import")
+    if old_usage and not (exact_only and old_import and " import " in old_import):
+        token = old_usage.split("(", 1)[0].split()[-1]
+        match_names.add(token)
+    for extra_name in configured_match_names:
+        match_names.add(extra_name)
+    qualified_symbol = display_symbol if display_symbol.startswith(f"{library}.") else f"{library}.{display_symbol}"
+    return SignatureRule(
+        library=library,
+        symbol=display_symbol,
+        message=f"{qualified_symbol} is incompatible with {version_assumption}: {reason}",
+        version_assumption=version_assumption,
+        severity=severity,
+        replacement=replacement,
+        fix_safety=safety,
+        evidence=evidence or "",
+        match_names=tuple(sorted(name for name in match_names if name)),
+        match_exact_only=exact_only,
+        required_keywords=tuple(sorted(entry.get("required_keywords", []))),
+    )
+@lru_cache(maxsize=1)
+def load_signatures(path: str | None = None) -> dict[str, list[SignatureRule]]:
+    signature_path = Path(path) if path else DEFAULT_SIGNATURE_PATH
+    with signature_path.open("r", encoding="utf-8") as handle:
+        raw = json.load(handle)
+    rules: dict[str, list[SignatureRule]] = {}
+    for library, library_data in raw.items():
+        current_version = library_data.get("current_version", "current")
+        for symbol, entry in library_data.get("methods", {}).items():
+            rule = _rule_from_entry(library, symbol, entry, current_version)
+            if rule:
+                rules.setdefault(library, []).append(rule)
+    return rules
+def validate_signature_database(path: str | None = None, require_official_evidence: bool = False) -> list[str]:
+    signature_path = Path(path) if path else DEFAULT_SIGNATURE_PATH
+    with signature_path.open("r", encoding="utf-8") as handle:
+        raw = json.load(handle)
+    errors: list[str] = []
+    seen: set[tuple[str, str]] = set()
+    for library, library_data in raw.items():
+        if not isinstance(library_data, dict):
+            errors.append(f"{library}: library entry must be an object")
+            continue
+        methods = library_data.get("methods")
+        if not isinstance(methods, dict):
+            errors.append(f"{library}: missing methods object")
+            continue
+        for symbol, entry in methods.items():
+            key = (library, symbol)
+            if key in seen:
+                errors.append(f"{library}.{symbol}: duplicate rule")
+            seen.add(key)
+            if not isinstance(entry, dict):
+                errors.append(f"{library}.{symbol}: rule must be an object")
+                continue
+            rule = _rule_from_entry(library, symbol, entry, library_data.get("current_version", "current"))
+            if not rule:
+                continue
+            if not rule.evidence:
+                errors.append(f"{library}.{symbol}: missing evidence")
+            if require_official_evidence and rule.evidence and not (
+                entry.get("source_url") or entry.get("release_note")
+            ):
+                errors.append(f"{library}.{symbol}: production rules require source_url or release_note")
+            if rule.fix_safety == "safe_fix" and not rule.replacement:
+                errors.append(f"{library}.{symbol}: safe_fix requires replacement")
+            if rule.fix_safety not in {"safe_fix", "suggested_fix", "no_fix"}:
+                errors.append(f"{library}.{symbol}: invalid fix safety {rule.fix_safety}")
+    return errors
+def find_rule(
+    library: str,
+    symbol: str,
+    rules: dict[str, list[SignatureRule]] | None = None,
+    keywords: set[str] | None = None,
+) -> SignatureRule | None:
+    rules = rules or load_signatures()
+    keywords = keywords or set()
+    attr = symbol.split(".")[-1]
+    candidates = {symbol, attr}
+    for rule in rules.get(library, []):
+        if rule.required_keywords and not set(rule.required_keywords).intersection(keywords):
+            continue
+        rule_names = set(rule.match_names)
+        if rule.match_exact_only and symbol in rule_names:
+            return rule
+        if not rule.match_exact_only and candidates.intersection(rule_names):
+            return rule
+    return None

llm_code_validator/versioning.py ADDED Viewed

@@ -0,0 +1,153 @@
+from __future__ import annotations
+import re
+import tomllib
+from dataclasses import dataclass
+from pathlib import Path
+DEFAULT_REQUIREMENTS_FILENAMES = ("requirements.txt", "pyproject.toml", "poetry.lock", "uv.lock", "Pipfile.lock")
+_REQ_PATTERN = re.compile(r"^\s*([A-Za-z0-9_.-]+)\s*([<>=!~]=?|===)?\s*([^\s;#]+)?")
+_LOCK_NAME_PATTERN = re.compile(r'^\s*name\s*=\s*["\']([^"\']+)["\']')
+_LOCK_VERSION_PATTERN = re.compile(r'^\s*version\s*=\s*["\']([^"\']+)["\']')
+@dataclass(frozen=True)
+class VersionContext:
+    requirements_path: str | None
+    dependencies: dict[str, str]
+    python_version: str | None = None
+    used_defaults: bool = False
+    def assumption_for(self, library: str, fallback: str) -> str:
+        pinned = self.dependencies.get(library.lower())
+        if pinned:
+            return f"{library}{pinned}"
+        return fallback
+def parse_requirements(path: str | Path) -> dict[str, str]:
+    requirements_path = Path(path)
+    dependencies: dict[str, str] = {}
+    for raw_line in _read_text_lenient(requirements_path).splitlines():
+        line = raw_line.strip()
+        if not line or line.startswith("#") or line.startswith("-"):
+            continue
+        match = _REQ_PATTERN.match(line)
+        if not match:
+            continue
+        name, operator, version = match.groups()
+        normalized = name.replace("_", "-").lower()
+        dependencies[normalized] = f"{operator or ''}{version or ''}"
+    return dependencies
+def _read_text_lenient(path: Path) -> str:
+    try:
+        return path.read_text(encoding="utf-8")
+    except UnicodeDecodeError:
+        return path.read_text(encoding="utf-8-sig", errors="ignore")
+def _normalize_name(name: str) -> str:
+    return name.replace("_", "-").lower()
+def _parse_requirement_string(requirement: str) -> tuple[str, str] | None:
+    match = _REQ_PATTERN.match(requirement)
+    if not match:
+        return None
+    name, operator, version = match.groups()
+    return _normalize_name(name), f"{operator or ''}{version or ''}"
+def parse_pyproject(path: str | Path) -> dict[str, str]:
+    data = tomllib.loads(_read_text_lenient(Path(path)))
+    dependencies: dict[str, str] = {}
+    project = data.get("project", {})
+    for requirement in project.get("dependencies", []) or []:
+        parsed = _parse_requirement_string(requirement)
+        if parsed:
+            dependencies[parsed[0]] = parsed[1]
+    poetry_deps = data.get("tool", {}).get("poetry", {}).get("dependencies", {})
+    for name, value in poetry_deps.items():
+        if name.lower() == "python":
+            continue
+        if isinstance(value, str):
+            dependencies[_normalize_name(name)] = value
+        elif isinstance(value, dict):
+            dependencies[_normalize_name(name)] = str(value.get("version", ""))
+    return dependencies
+def parse_toml_lock(path: str | Path) -> dict[str, str]:
+    dependencies: dict[str, str] = {}
+    data = tomllib.loads(_read_text_lenient(Path(path)))
+    packages = data.get("package", [])
+    if isinstance(packages, dict):
+        packages = list(packages.values())
+    for package in packages:
+        if not isinstance(package, dict):
+            continue
+        name = package.get("name")
+        version = package.get("version")
+        if name and version:
+            dependencies[_normalize_name(str(name))] = f"=={version}"
+    return dependencies
+def parse_pipfile_lock(path: str | Path) -> dict[str, str]:
+    import json
+    data = json.loads(_read_text_lenient(Path(path)))
+    dependencies: dict[str, str] = {}
+    for section in ("default", "develop"):
+        for name, value in data.get(section, {}).items():
+            if isinstance(value, str):
+                dependencies[_normalize_name(name)] = value
+            elif isinstance(value, dict):
+                dependencies[_normalize_name(name)] = str(value.get("version", ""))
+    return dependencies
+def parse_dependency_file(path: str | Path) -> dict[str, str]:
+    dep_path = Path(path)
+    if dep_path.name == "requirements.txt":
+        return parse_requirements(dep_path)
+    if dep_path.name == "pyproject.toml":
+        return parse_pyproject(dep_path)
+    if dep_path.name in {"poetry.lock", "uv.lock"}:
+        return parse_toml_lock(dep_path)
+    if dep_path.name == "Pipfile.lock":
+        return parse_pipfile_lock(dep_path)
+    return parse_requirements(dep_path)
+def discover_requirements(start: str | Path = ".") -> Path | None:
+    root = Path(start)
+    if root.is_file():
+        root = root.parent
+    for filename in DEFAULT_REQUIREMENTS_FILENAMES:
+        candidate = root / filename
+        if candidate.exists():
+            return candidate
+    return None
+def build_version_context(
+    paths: list[str] | None = None,
+    requirements: str | None = None,
+    python_version: str | None = None,
+) -> VersionContext:
+    if requirements:
+        req_path = Path(requirements)
+        return VersionContext(str(req_path), parse_dependency_file(req_path), python_version, used_defaults=False)
+    search_root = "."
+    if paths:
+        first_path = Path(paths[0])
+        search_root = str(first_path.parent if first_path.is_file() else first_path)
+    discovered = discover_requirements(search_root)
+    if discovered:
+        return VersionContext(str(discovered), parse_dependency_file(discovered), python_version, used_defaults=False)
+    return VersionContext(None, {}, python_version, used_defaults=True)

llm_code_validator-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,220 @@
+Metadata-Version: 2.4
+Name: llm-code-validator
+Version: 0.1.0
+Summary: CLI guardrail for catching stale Python APIs before runtime.
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Provides-Extra: dev
+Requires-Dist: pytest>=8; extra == "dev"
+Dynamic: license-file
+# llm-code-validator
+Python CLI for detecting stale or version-incompatible third-party API usage in Python source code.
+## Installation
+Install from the repository:
+```bash
+git clone https://github.com/mathew-felix/llm-code-validator
+cd llm-code-validator
+pip install -e .
+```
+Install with test dependencies:
+```bash
+pip install -e ".[dev]"
+```
+After the package is published to PyPI:
+```bash
+pip install llm-code-validator
+```
+## Usage
+Check one file:
+```bash
+llm-code-validator check file.py
+```
+Check a directory:
+```bash
+llm-code-validator check src/
+```
+Check standard input:
+```bash
+llm-code-validator check - < snippet.py
+```
+Check staged Git files:
+```bash
+llm-code-validator check --staged
+```
+Use an explicit dependency file:
+```bash
+llm-code-validator check --requirements requirements.txt src/
+```
+Show low-confidence diagnostics:
+```bash
+llm-code-validator check --show-low-confidence src/
+```
+Exit codes:
+- `0`: no diagnostics
+- `1`: diagnostics found
+- `2`: tool error
+## Output
+Text output:
+```bash
+llm-code-validator check src/
+```
+JSON output:
+```bash
+llm-code-validator check src/ --format json
+```
+GitHub Actions annotation output:
+```bash
+llm-code-validator check src/ --format github
+```
+## Fixes
+Preview fixes:
+```bash
+llm-code-validator fix file.py
+```
+Apply safe fixes:
+```bash
+llm-code-validator fix file.py --write
+```
+Only rules marked `safe_fix` are written. Rules marked `suggested_fix` or `no_fix` are reported but not changed.
+Current rule safety counts:
+- `safe_fix`: 15 rules
+- `suggested_fix`: 51 rules
+- `no_fix`: 2 rules
+## Signature Database
+Validate the rule database:
+```bash
+llm-code-validator validate-signatures
+```
+The source rule database is:
+```text
+data/library_signatures.json
+```
+The packaged rule database is:
+```text
+llm_code_validator/library_signatures.json
+```
+Current rule count:
+- 68 API-drift rules
+## Benchmarks
+Run the CLI benchmark dataset:
+```bash
+python -m llm_code_validator.benchmark --dataset validation_dataset/cli_benchmark_cases.json
+```
+Run the AI-stack benchmark dataset:
+```bash
+python -m llm_code_validator.benchmark --dataset validation_dataset/ai_stack_benchmark_cases.json
+```
+Current saved benchmark results:
+- CLI dataset: precision `1.0`, recall `1.0`, p50 `0.243ms`, p95 `6.199ms`
+- AI-stack dataset: precision `1.0`, recall `1.0`, p50 `0.444ms`, p95 `4.939ms`
+## Pre-Commit
+`.pre-commit-hooks.yaml` is included:
+```yaml
+repos:
+  - repo: https://github.com/mathew-felix/llm-code-validator
+    rev: v0.1.0
+    hooks:
+      - id: llm-code-validator
+```
+## GitHub Actions
+Example workflow:
+```yaml
+name: API Drift Check
+on:
+  pull_request:
+jobs:
+  api-drift:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - run: pip install llm-code-validator
+      - run: llm-code-validator check . --format github
+```
+## Testing
+Run the test suite:
+```bash
+pytest -q
+```
+Current local result:
+```text
+72 passed
+```
+## Documentation
+- `docs/demo.md`: example check and fix workflow
+- `docs/rules.md`: rule database notes
+- `docs/release.md`: package release steps
+- `PROJECT_REPORT.md`: project report

llm_code_validator-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,16 @@
+llm_code_validator/__init__.py,sha256=b1P-a04qNBDE4gl9pG8j5x3ko8ZnOVbVgxeF1DN2F5I,85
+llm_code_validator/benchmark.py,sha256=Ga22OpwehVKMUZ1tF39WKEx-knjlfKwxUPHgxSuSY3o,5554
+llm_code_validator/cli.py,sha256=y6Gyo-ttcjxFfIddp-nYp-RPC3iFKW2I0I-gspJMv1k,4599
+llm_code_validator/core.py,sha256=BEHRc4FKPMfPsLKBhZeqBxG-YoH2fSS64-aIoHGeTQk,12298
+llm_code_validator/diagnostics.py,sha256=K1PJ8qBevv5wWqNgYcr-vj2iuCjPRsEH4YTJewnNG8M,1667
+llm_code_validator/fixes.py,sha256=hpro-qODrBmsGh9wH79vg6MbTXasA7F0OjASiKnDZIs,2348
+llm_code_validator/formatting.py,sha256=8UYMH6AOlROVoyRMx8fkcyJuyMjXNPKjZFl-DJuEWf4,1519
+llm_code_validator/library_signatures.json,sha256=QkSt15EvhHjrK358O1IeVtbVgIo09V0sj9Qe97IYVK8,45559
+llm_code_validator/signatures.py,sha256=Bf47pk5Pesn_6caKLLRJDdLCwcVzWzZ6Z_jyyuHsZL4,6951
+llm_code_validator/versioning.py,sha256=xQBgZca3ITCJDTQxwKLu60NaVYHE4o2fkM5fspy8BdQ,5443
+llm_code_validator-0.1.0.dist-info/licenses/LICENSE,sha256=8pmblFexDdMWS6-UCM8BJ6XrLtZfaT23oLzHzg9MVsk,1090
+llm_code_validator-0.1.0.dist-info/METADATA,sha256=4QSNbgoSi2b1WT4cliO3BWE3sNlCEx8QMHLoy3FVhYw,3771
+llm_code_validator-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+llm_code_validator-0.1.0.dist-info/entry_points.txt,sha256=oZLKNbby_0kabhK8HSZhYWbkkt8UeVvBGnWkX8NjOL8,67
+llm_code_validator-0.1.0.dist-info/top_level.txt,sha256=YwsP5j-5OQJz85PCQ7oJiV1XBuVNGWLvK6BdkV9zoIg,19
+llm_code_validator-0.1.0.dist-info/RECORD,,

llm_code_validator-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (82.0.1)
+Root-Is-Purelib: true
+Tag: py3-none-any

llm_code_validator-0.1.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ llm-code-validator = llm_code_validator.cli:main

llm_code_validator-0.1.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Felix Mathew
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

llm_code_validator-0.1.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ llm_code_validator