llm-code-validator 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,163 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import dataclass
5
+ from functools import lru_cache
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+
10
+ REPO_ROOT = Path(__file__).resolve().parents[1]
11
+ PACKAGE_SIGNATURE_PATH = Path(__file__).with_name("library_signatures.json")
12
+ REPO_SIGNATURE_PATH = REPO_ROOT / "data" / "library_signatures.json"
13
+ DEFAULT_SIGNATURE_PATH = PACKAGE_SIGNATURE_PATH if PACKAGE_SIGNATURE_PATH.exists() else REPO_SIGNATURE_PATH
14
+
15
+
16
+ @dataclass(frozen=True)
17
+ class SignatureRule:
18
+ library: str
19
+ symbol: str
20
+ message: str
21
+ version_assumption: str
22
+ severity: str
23
+ replacement: str | None
24
+ fix_safety: str
25
+ evidence: str
26
+ match_names: tuple[str, ...]
27
+ match_exact_only: bool = False
28
+ required_keywords: tuple[str, ...] = ()
29
+
30
+
31
+ class SignatureValidationError(ValueError):
32
+ pass
33
+
34
+
35
+ def _rule_from_entry(library: str, symbol: str, entry: dict[str, Any], current_version: str) -> SignatureRule | None:
36
+ has_issue = not entry.get("exists", True) or "module_old" in entry or "changed_in" in entry
37
+ if not has_issue:
38
+ return None
39
+
40
+ removed_or_changed = entry.get("removed_in") or entry.get("changed_in") or current_version
41
+ version_assumption = f"{library}>={removed_or_changed}" if removed_or_changed else f"{library} current"
42
+ reason = entry.get("reason") or entry.get("note") or entry.get("common_mistake") or "Known API drift pattern."
43
+ replacement = entry.get("replacement") or entry.get("new_import") or entry.get("correct_usage") or entry.get("new_usage")
44
+ safety = entry.get("fix_safety") or ("suggested_fix" if replacement else "no_fix")
45
+ severity = "error" if not entry.get("exists", True) else "warning"
46
+ evidence = entry.get("source_url") or entry.get("source_note") or entry.get("release_note")
47
+ display_symbol = symbol
48
+ if symbol in {"append", "mad"} and library == "pandas":
49
+ display_symbol = f"DataFrame.{symbol}"
50
+
51
+ exact_only = bool(entry.get("match_exact_only", False))
52
+ configured_match_names = entry.get("match_names", [])
53
+ old_import = entry.get("old_import")
54
+ if exact_only and old_import and " import " in old_import:
55
+ match_names = set(configured_match_names)
56
+ elif exact_only and configured_match_names:
57
+ match_names = set(configured_match_names)
58
+ else:
59
+ match_names = {symbol, symbol.split(".")[-1]}
60
+ if old_import and " import " in old_import:
61
+ import_line = old_import.splitlines()[0]
62
+ module, name = import_line.removeprefix("from ").rsplit(" import ", 1)
63
+ if not exact_only:
64
+ match_names.add(name.strip())
65
+ match_names.add(f"{module.strip()}.{name.strip()}")
66
+ old_usage = entry.get("old_usage") or entry.get("old_import")
67
+ if old_usage and not (exact_only and old_import and " import " in old_import):
68
+ token = old_usage.split("(", 1)[0].split()[-1]
69
+ match_names.add(token)
70
+ for extra_name in configured_match_names:
71
+ match_names.add(extra_name)
72
+
73
+ qualified_symbol = display_symbol if display_symbol.startswith(f"{library}.") else f"{library}.{display_symbol}"
74
+
75
+ return SignatureRule(
76
+ library=library,
77
+ symbol=display_symbol,
78
+ message=f"{qualified_symbol} is incompatible with {version_assumption}: {reason}",
79
+ version_assumption=version_assumption,
80
+ severity=severity,
81
+ replacement=replacement,
82
+ fix_safety=safety,
83
+ evidence=evidence or "",
84
+ match_names=tuple(sorted(name for name in match_names if name)),
85
+ match_exact_only=exact_only,
86
+ required_keywords=tuple(sorted(entry.get("required_keywords", []))),
87
+ )
88
+
89
+
90
+ @lru_cache(maxsize=1)
91
+ def load_signatures(path: str | None = None) -> dict[str, list[SignatureRule]]:
92
+ signature_path = Path(path) if path else DEFAULT_SIGNATURE_PATH
93
+ with signature_path.open("r", encoding="utf-8") as handle:
94
+ raw = json.load(handle)
95
+
96
+ rules: dict[str, list[SignatureRule]] = {}
97
+ for library, library_data in raw.items():
98
+ current_version = library_data.get("current_version", "current")
99
+ for symbol, entry in library_data.get("methods", {}).items():
100
+ rule = _rule_from_entry(library, symbol, entry, current_version)
101
+ if rule:
102
+ rules.setdefault(library, []).append(rule)
103
+ return rules
104
+
105
+
106
+ def validate_signature_database(path: str | None = None, require_official_evidence: bool = False) -> list[str]:
107
+ signature_path = Path(path) if path else DEFAULT_SIGNATURE_PATH
108
+ with signature_path.open("r", encoding="utf-8") as handle:
109
+ raw = json.load(handle)
110
+
111
+ errors: list[str] = []
112
+ seen: set[tuple[str, str]] = set()
113
+ for library, library_data in raw.items():
114
+ if not isinstance(library_data, dict):
115
+ errors.append(f"{library}: library entry must be an object")
116
+ continue
117
+ methods = library_data.get("methods")
118
+ if not isinstance(methods, dict):
119
+ errors.append(f"{library}: missing methods object")
120
+ continue
121
+ for symbol, entry in methods.items():
122
+ key = (library, symbol)
123
+ if key in seen:
124
+ errors.append(f"{library}.{symbol}: duplicate rule")
125
+ seen.add(key)
126
+ if not isinstance(entry, dict):
127
+ errors.append(f"{library}.{symbol}: rule must be an object")
128
+ continue
129
+ rule = _rule_from_entry(library, symbol, entry, library_data.get("current_version", "current"))
130
+ if not rule:
131
+ continue
132
+ if not rule.evidence:
133
+ errors.append(f"{library}.{symbol}: missing evidence")
134
+ if require_official_evidence and rule.evidence and not (
135
+ entry.get("source_url") or entry.get("release_note")
136
+ ):
137
+ errors.append(f"{library}.{symbol}: production rules require source_url or release_note")
138
+ if rule.fix_safety == "safe_fix" and not rule.replacement:
139
+ errors.append(f"{library}.{symbol}: safe_fix requires replacement")
140
+ if rule.fix_safety not in {"safe_fix", "suggested_fix", "no_fix"}:
141
+ errors.append(f"{library}.{symbol}: invalid fix safety {rule.fix_safety}")
142
+ return errors
143
+
144
+
145
+ def find_rule(
146
+ library: str,
147
+ symbol: str,
148
+ rules: dict[str, list[SignatureRule]] | None = None,
149
+ keywords: set[str] | None = None,
150
+ ) -> SignatureRule | None:
151
+ rules = rules or load_signatures()
152
+ keywords = keywords or set()
153
+ attr = symbol.split(".")[-1]
154
+ candidates = {symbol, attr}
155
+ for rule in rules.get(library, []):
156
+ if rule.required_keywords and not set(rule.required_keywords).intersection(keywords):
157
+ continue
158
+ rule_names = set(rule.match_names)
159
+ if rule.match_exact_only and symbol in rule_names:
160
+ return rule
161
+ if not rule.match_exact_only and candidates.intersection(rule_names):
162
+ return rule
163
+ return None
@@ -0,0 +1,153 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ import tomllib
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+
8
+
9
+ DEFAULT_REQUIREMENTS_FILENAMES = ("requirements.txt", "pyproject.toml", "poetry.lock", "uv.lock", "Pipfile.lock")
10
+ _REQ_PATTERN = re.compile(r"^\s*([A-Za-z0-9_.-]+)\s*([<>=!~]=?|===)?\s*([^\s;#]+)?")
11
+ _LOCK_NAME_PATTERN = re.compile(r'^\s*name\s*=\s*["\']([^"\']+)["\']')
12
+ _LOCK_VERSION_PATTERN = re.compile(r'^\s*version\s*=\s*["\']([^"\']+)["\']')
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class VersionContext:
17
+ requirements_path: str | None
18
+ dependencies: dict[str, str]
19
+ python_version: str | None = None
20
+ used_defaults: bool = False
21
+
22
+ def assumption_for(self, library: str, fallback: str) -> str:
23
+ pinned = self.dependencies.get(library.lower())
24
+ if pinned:
25
+ return f"{library}{pinned}"
26
+ return fallback
27
+
28
+
29
+ def parse_requirements(path: str | Path) -> dict[str, str]:
30
+ requirements_path = Path(path)
31
+ dependencies: dict[str, str] = {}
32
+ for raw_line in _read_text_lenient(requirements_path).splitlines():
33
+ line = raw_line.strip()
34
+ if not line or line.startswith("#") or line.startswith("-"):
35
+ continue
36
+ match = _REQ_PATTERN.match(line)
37
+ if not match:
38
+ continue
39
+ name, operator, version = match.groups()
40
+ normalized = name.replace("_", "-").lower()
41
+ dependencies[normalized] = f"{operator or ''}{version or ''}"
42
+ return dependencies
43
+
44
+
45
+ def _read_text_lenient(path: Path) -> str:
46
+ try:
47
+ return path.read_text(encoding="utf-8")
48
+ except UnicodeDecodeError:
49
+ return path.read_text(encoding="utf-8-sig", errors="ignore")
50
+
51
+
52
+ def _normalize_name(name: str) -> str:
53
+ return name.replace("_", "-").lower()
54
+
55
+
56
+ def _parse_requirement_string(requirement: str) -> tuple[str, str] | None:
57
+ match = _REQ_PATTERN.match(requirement)
58
+ if not match:
59
+ return None
60
+ name, operator, version = match.groups()
61
+ return _normalize_name(name), f"{operator or ''}{version or ''}"
62
+
63
+
64
+ def parse_pyproject(path: str | Path) -> dict[str, str]:
65
+ data = tomllib.loads(_read_text_lenient(Path(path)))
66
+ dependencies: dict[str, str] = {}
67
+ project = data.get("project", {})
68
+ for requirement in project.get("dependencies", []) or []:
69
+ parsed = _parse_requirement_string(requirement)
70
+ if parsed:
71
+ dependencies[parsed[0]] = parsed[1]
72
+ poetry_deps = data.get("tool", {}).get("poetry", {}).get("dependencies", {})
73
+ for name, value in poetry_deps.items():
74
+ if name.lower() == "python":
75
+ continue
76
+ if isinstance(value, str):
77
+ dependencies[_normalize_name(name)] = value
78
+ elif isinstance(value, dict):
79
+ dependencies[_normalize_name(name)] = str(value.get("version", ""))
80
+ return dependencies
81
+
82
+
83
+ def parse_toml_lock(path: str | Path) -> dict[str, str]:
84
+ dependencies: dict[str, str] = {}
85
+ data = tomllib.loads(_read_text_lenient(Path(path)))
86
+ packages = data.get("package", [])
87
+ if isinstance(packages, dict):
88
+ packages = list(packages.values())
89
+ for package in packages:
90
+ if not isinstance(package, dict):
91
+ continue
92
+ name = package.get("name")
93
+ version = package.get("version")
94
+ if name and version:
95
+ dependencies[_normalize_name(str(name))] = f"=={version}"
96
+ return dependencies
97
+
98
+
99
+ def parse_pipfile_lock(path: str | Path) -> dict[str, str]:
100
+ import json
101
+
102
+ data = json.loads(_read_text_lenient(Path(path)))
103
+ dependencies: dict[str, str] = {}
104
+ for section in ("default", "develop"):
105
+ for name, value in data.get(section, {}).items():
106
+ if isinstance(value, str):
107
+ dependencies[_normalize_name(name)] = value
108
+ elif isinstance(value, dict):
109
+ dependencies[_normalize_name(name)] = str(value.get("version", ""))
110
+ return dependencies
111
+
112
+
113
+ def parse_dependency_file(path: str | Path) -> dict[str, str]:
114
+ dep_path = Path(path)
115
+ if dep_path.name == "requirements.txt":
116
+ return parse_requirements(dep_path)
117
+ if dep_path.name == "pyproject.toml":
118
+ return parse_pyproject(dep_path)
119
+ if dep_path.name in {"poetry.lock", "uv.lock"}:
120
+ return parse_toml_lock(dep_path)
121
+ if dep_path.name == "Pipfile.lock":
122
+ return parse_pipfile_lock(dep_path)
123
+ return parse_requirements(dep_path)
124
+
125
+
126
+ def discover_requirements(start: str | Path = ".") -> Path | None:
127
+ root = Path(start)
128
+ if root.is_file():
129
+ root = root.parent
130
+ for filename in DEFAULT_REQUIREMENTS_FILENAMES:
131
+ candidate = root / filename
132
+ if candidate.exists():
133
+ return candidate
134
+ return None
135
+
136
+
137
+ def build_version_context(
138
+ paths: list[str] | None = None,
139
+ requirements: str | None = None,
140
+ python_version: str | None = None,
141
+ ) -> VersionContext:
142
+ if requirements:
143
+ req_path = Path(requirements)
144
+ return VersionContext(str(req_path), parse_dependency_file(req_path), python_version, used_defaults=False)
145
+
146
+ search_root = "."
147
+ if paths:
148
+ first_path = Path(paths[0])
149
+ search_root = str(first_path.parent if first_path.is_file() else first_path)
150
+ discovered = discover_requirements(search_root)
151
+ if discovered:
152
+ return VersionContext(str(discovered), parse_dependency_file(discovered), python_version, used_defaults=False)
153
+ return VersionContext(None, {}, python_version, used_defaults=True)
@@ -0,0 +1,220 @@
1
+ Metadata-Version: 2.4
2
+ Name: llm-code-validator
3
+ Version: 0.1.0
4
+ Summary: CLI guardrail for catching stale Python APIs before runtime.
5
+ Requires-Python: >=3.10
6
+ Description-Content-Type: text/markdown
7
+ License-File: LICENSE
8
+ Provides-Extra: dev
9
+ Requires-Dist: pytest>=8; extra == "dev"
10
+ Dynamic: license-file
11
+
12
+ # llm-code-validator
13
+
14
+ Python CLI for detecting stale or version-incompatible third-party API usage in Python source code.
15
+
16
+ ## Installation
17
+
18
+ Install from the repository:
19
+
20
+ ```bash
21
+ git clone https://github.com/mathew-felix/llm-code-validator
22
+ cd llm-code-validator
23
+ pip install -e .
24
+ ```
25
+
26
+ Install with test dependencies:
27
+
28
+ ```bash
29
+ pip install -e ".[dev]"
30
+ ```
31
+
32
+ After the package is published to PyPI:
33
+
34
+ ```bash
35
+ pip install llm-code-validator
36
+ ```
37
+
38
+ ## Usage
39
+
40
+ Check one file:
41
+
42
+ ```bash
43
+ llm-code-validator check file.py
44
+ ```
45
+
46
+ Check a directory:
47
+
48
+ ```bash
49
+ llm-code-validator check src/
50
+ ```
51
+
52
+ Check standard input:
53
+
54
+ ```bash
55
+ llm-code-validator check - < snippet.py
56
+ ```
57
+
58
+ Check staged Git files:
59
+
60
+ ```bash
61
+ llm-code-validator check --staged
62
+ ```
63
+
64
+ Use an explicit dependency file:
65
+
66
+ ```bash
67
+ llm-code-validator check --requirements requirements.txt src/
68
+ ```
69
+
70
+ Show low-confidence diagnostics:
71
+
72
+ ```bash
73
+ llm-code-validator check --show-low-confidence src/
74
+ ```
75
+
76
+ Exit codes:
77
+
78
+ - `0`: no diagnostics
79
+ - `1`: diagnostics found
80
+ - `2`: tool error
81
+
82
+ ## Output
83
+
84
+ Text output:
85
+
86
+ ```bash
87
+ llm-code-validator check src/
88
+ ```
89
+
90
+ JSON output:
91
+
92
+ ```bash
93
+ llm-code-validator check src/ --format json
94
+ ```
95
+
96
+ GitHub Actions annotation output:
97
+
98
+ ```bash
99
+ llm-code-validator check src/ --format github
100
+ ```
101
+
102
+ ## Fixes
103
+
104
+ Preview fixes:
105
+
106
+ ```bash
107
+ llm-code-validator fix file.py
108
+ ```
109
+
110
+ Apply safe fixes:
111
+
112
+ ```bash
113
+ llm-code-validator fix file.py --write
114
+ ```
115
+
116
+ Only rules marked `safe_fix` are written. Rules marked `suggested_fix` or `no_fix` are reported but not changed.
117
+
118
+ Current rule safety counts:
119
+
120
+ - `safe_fix`: 15 rules
121
+ - `suggested_fix`: 51 rules
122
+ - `no_fix`: 2 rules
123
+
124
+ ## Signature Database
125
+
126
+ Validate the rule database:
127
+
128
+ ```bash
129
+ llm-code-validator validate-signatures
130
+ ```
131
+
132
+ The source rule database is:
133
+
134
+ ```text
135
+ data/library_signatures.json
136
+ ```
137
+
138
+ The packaged rule database is:
139
+
140
+ ```text
141
+ llm_code_validator/library_signatures.json
142
+ ```
143
+
144
+ Current rule count:
145
+
146
+ - 68 API-drift rules
147
+
148
+ ## Benchmarks
149
+
150
+ Run the CLI benchmark dataset:
151
+
152
+ ```bash
153
+ python -m llm_code_validator.benchmark --dataset validation_dataset/cli_benchmark_cases.json
154
+ ```
155
+
156
+ Run the AI-stack benchmark dataset:
157
+
158
+ ```bash
159
+ python -m llm_code_validator.benchmark --dataset validation_dataset/ai_stack_benchmark_cases.json
160
+ ```
161
+
162
+ Current saved benchmark results:
163
+
164
+ - CLI dataset: precision `1.0`, recall `1.0`, p50 `0.243ms`, p95 `6.199ms`
165
+ - AI-stack dataset: precision `1.0`, recall `1.0`, p50 `0.444ms`, p95 `4.939ms`
166
+
167
+ ## Pre-Commit
168
+
169
+ `.pre-commit-hooks.yaml` is included:
170
+
171
+ ```yaml
172
+ repos:
173
+ - repo: https://github.com/mathew-felix/llm-code-validator
174
+ rev: v0.1.0
175
+ hooks:
176
+ - id: llm-code-validator
177
+ ```
178
+
179
+ ## GitHub Actions
180
+
181
+ Example workflow:
182
+
183
+ ```yaml
184
+ name: API Drift Check
185
+
186
+ on:
187
+ pull_request:
188
+
189
+ jobs:
190
+ api-drift:
191
+ runs-on: ubuntu-latest
192
+ steps:
193
+ - uses: actions/checkout@v4
194
+ - uses: actions/setup-python@v5
195
+ with:
196
+ python-version: "3.11"
197
+ - run: pip install llm-code-validator
198
+ - run: llm-code-validator check . --format github
199
+ ```
200
+
201
+ ## Testing
202
+
203
+ Run the test suite:
204
+
205
+ ```bash
206
+ pytest -q
207
+ ```
208
+
209
+ Current local result:
210
+
211
+ ```text
212
+ 72 passed
213
+ ```
214
+
215
+ ## Documentation
216
+
217
+ - `docs/demo.md`: example check and fix workflow
218
+ - `docs/rules.md`: rule database notes
219
+ - `docs/release.md`: package release steps
220
+ - `PROJECT_REPORT.md`: project report
@@ -0,0 +1,16 @@
1
+ llm_code_validator/__init__.py,sha256=b1P-a04qNBDE4gl9pG8j5x3ko8ZnOVbVgxeF1DN2F5I,85
2
+ llm_code_validator/benchmark.py,sha256=Ga22OpwehVKMUZ1tF39WKEx-knjlfKwxUPHgxSuSY3o,5554
3
+ llm_code_validator/cli.py,sha256=y6Gyo-ttcjxFfIddp-nYp-RPC3iFKW2I0I-gspJMv1k,4599
4
+ llm_code_validator/core.py,sha256=BEHRc4FKPMfPsLKBhZeqBxG-YoH2fSS64-aIoHGeTQk,12298
5
+ llm_code_validator/diagnostics.py,sha256=K1PJ8qBevv5wWqNgYcr-vj2iuCjPRsEH4YTJewnNG8M,1667
6
+ llm_code_validator/fixes.py,sha256=hpro-qODrBmsGh9wH79vg6MbTXasA7F0OjASiKnDZIs,2348
7
+ llm_code_validator/formatting.py,sha256=8UYMH6AOlROVoyRMx8fkcyJuyMjXNPKjZFl-DJuEWf4,1519
8
+ llm_code_validator/library_signatures.json,sha256=QkSt15EvhHjrK358O1IeVtbVgIo09V0sj9Qe97IYVK8,45559
9
+ llm_code_validator/signatures.py,sha256=Bf47pk5Pesn_6caKLLRJDdLCwcVzWzZ6Z_jyyuHsZL4,6951
10
+ llm_code_validator/versioning.py,sha256=xQBgZca3ITCJDTQxwKLu60NaVYHE4o2fkM5fspy8BdQ,5443
11
+ llm_code_validator-0.1.0.dist-info/licenses/LICENSE,sha256=8pmblFexDdMWS6-UCM8BJ6XrLtZfaT23oLzHzg9MVsk,1090
12
+ llm_code_validator-0.1.0.dist-info/METADATA,sha256=4QSNbgoSi2b1WT4cliO3BWE3sNlCEx8QMHLoy3FVhYw,3771
13
+ llm_code_validator-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
14
+ llm_code_validator-0.1.0.dist-info/entry_points.txt,sha256=oZLKNbby_0kabhK8HSZhYWbkkt8UeVvBGnWkX8NjOL8,67
15
+ llm_code_validator-0.1.0.dist-info/top_level.txt,sha256=YwsP5j-5OQJz85PCQ7oJiV1XBuVNGWLvK6BdkV9zoIg,19
16
+ llm_code_validator-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ llm-code-validator = llm_code_validator.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Felix Mathew
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ llm_code_validator