llm-code-validator 0.1.0__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. llm_code_validator-0.1.1/PKG-INFO +187 -0
  2. llm_code_validator-0.1.1/README.md +160 -0
  3. {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator/__init__.py +1 -1
  4. llm_code_validator-0.1.1/llm_code_validator/ai_review.py +153 -0
  5. llm_code_validator-0.1.1/llm_code_validator/cli.py +221 -0
  6. llm_code_validator-0.1.1/llm_code_validator/config.py +55 -0
  7. {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator/core.py +8 -4
  8. {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator/library_signatures.json +107 -103
  9. llm_code_validator-0.1.1/llm_code_validator/rule_candidates.py +40 -0
  10. llm_code_validator-0.1.1/llm_code_validator.egg-info/PKG-INFO +187 -0
  11. {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator.egg-info/SOURCES.txt +4 -0
  12. llm_code_validator-0.1.1/pyproject.toml +46 -0
  13. llm_code_validator-0.1.1/tests/test_ai_review.py +36 -0
  14. llm_code_validator-0.1.1/tests/test_cli.py +287 -0
  15. {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/tests/test_workflows.py +3 -1
  16. llm_code_validator-0.1.0/PKG-INFO +0 -220
  17. llm_code_validator-0.1.0/README.md +0 -209
  18. llm_code_validator-0.1.0/llm_code_validator/cli.py +0 -105
  19. llm_code_validator-0.1.0/llm_code_validator.egg-info/PKG-INFO +0 -220
  20. llm_code_validator-0.1.0/pyproject.toml +0 -26
  21. llm_code_validator-0.1.0/tests/test_cli.py +0 -142
  22. {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/LICENSE +0 -0
  23. {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator/benchmark.py +0 -0
  24. {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator/diagnostics.py +0 -0
  25. {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator/fixes.py +0 -0
  26. {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator/formatting.py +0 -0
  27. {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator/signatures.py +0 -0
  28. {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator/versioning.py +0 -0
  29. {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator.egg-info/dependency_links.txt +0 -0
  30. {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator.egg-info/entry_points.txt +0 -0
  31. {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator.egg-info/requires.txt +0 -0
  32. {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator.egg-info/top_level.txt +0 -0
  33. {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/setup.cfg +0 -0
  34. {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/tests/test_benchmark.py +0 -0
  35. {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/tests/test_core.py +0 -0
  36. {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/tests/test_external_repo_evaluation.py +0 -0
  37. {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/tests/test_fixes.py +0 -0
  38. {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/tests/test_formatting.py +0 -0
  39. {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/tests/test_signatures.py +0 -0
  40. {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/tests/test_versioning.py +0 -0
@@ -0,0 +1,187 @@
1
+ Metadata-Version: 2.4
2
+ Name: llm-code-validator
3
+ Version: 0.1.1
4
+ Summary: CLI guardrail for catching stale Python APIs before runtime.
5
+ Author: Felix Mathew
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/mathew-felix/llm-code-validator
8
+ Project-URL: Repository, https://github.com/mathew-felix/llm-code-validator
9
+ Project-URL: Issues, https://github.com/mathew-felix/llm-code-validator/issues
10
+ Project-URL: PyPI, https://pypi.org/project/llm-code-validator/
11
+ Keywords: api-drift,static-analysis,llm,python,ci
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Software Development :: Quality Assurance
20
+ Classifier: Topic :: Software Development :: Testing
21
+ Requires-Python: >=3.10
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Provides-Extra: dev
25
+ Requires-Dist: pytest>=8; extra == "dev"
26
+ Dynamic: license-file
27
+
28
+ # llm-code-validator
29
+
30
+ Python CLI for checking dependency-heavy Python projects for stale or version-incompatible third-party API usage before commit or CI.
31
+
32
+ It parses Python files with `ast`, checks imports and calls against a maintained API-drift rule database, and reports issues before runtime.
33
+
34
+ Default checks are local-only. No OpenAI, Anthropic, or other LLM API key is required, and the tool does not make network calls in normal use.
35
+
36
+ Current local validation: 74 tests passing, 68 API-drift rules, and PyPI install verified.
37
+
38
+ PyPI: https://pypi.org/project/llm-code-validator/
39
+
40
+ ![Terminal demo showing API drift diagnostics and safe fix preview](docs/demo.gif)
41
+
42
+ ## Install
43
+
44
+ ```bash
45
+ pip install llm-code-validator
46
+ ```
47
+
48
+ For local development:
49
+
50
+ ```bash
51
+ git clone https://github.com/mathew-felix/llm-code-validator
52
+ cd llm-code-validator
53
+ pip install -e ".[dev]"
54
+ ```
55
+
56
+ ## Quick Use
57
+
58
+ ```bash
59
+ llm-code-validator check file.py
60
+ llm-code-validator check src/
61
+ llm-code-validator check --staged
62
+ llm-code-validator check src/ --format json
63
+ llm-code-validator check src/ --format github
64
+ ```
65
+
66
+ Exit codes:
67
+
68
+ - `0`: no diagnostics
69
+ - `1`: diagnostics found
70
+ - `2`: tool error
71
+
72
+ ## Example
73
+
74
+ ```python
75
+ from sqlalchemy.ext.declarative import declarative_base
76
+
77
+ Base = declarative_base()
78
+ ```
79
+
80
+ ```bash
81
+ llm-code-validator check app.py
82
+ ```
83
+
84
+ ```text
85
+ app.py:1 LCV001 warning sqlalchemy.declarative_base sqlalchemy.declarative_base is incompatible with sqlalchemy>=2.0.0
86
+ fix: from sqlalchemy.orm import declarative_base
87
+ ```
88
+
89
+ Preview or apply safe fixes:
90
+
91
+ ```bash
92
+ llm-code-validator fix app.py
93
+ llm-code-validator fix app.py --write
94
+ ```
95
+
96
+ ## What It Checks
97
+
98
+ Current rule database:
99
+
100
+ - 68 API-drift rules
101
+ - 15 safe fixes
102
+ - Rules for OpenAI, Anthropic, LangChain, LangGraph, LlamaIndex, Pinecone, ChromaDB, FastAPI, Pydantic, pandas, NumPy, SQLAlchemy, Torch, and Transformers
103
+
104
+ Validate the rule database:
105
+
106
+ ```bash
107
+ llm-code-validator validate-signatures
108
+ ```
109
+
110
+ This checks source-level API migration patterns. It does not replace Ruff for linting, mypy for type checking, pip-audit for vulnerability checks, or Dependabot for dependency updates.
111
+
112
+ ## Security Model
113
+
114
+ By default, `llm-code-validator` reads local Python files, parses them with Python's built-in `ast` module, and compares imports and calls with the bundled rule database. It does not send source code, dependency files, environment variables, or secrets to any external service.
115
+
116
+ If optional AI-assisted review is added in the future, it should remain explicit opt-in and should minimize and redact any code snippets before a provider request.
117
+
118
+ ## Rule Maintenance
119
+
120
+ Public rules are reviewed before release. New rules should be added to `data/library_signatures.json`, backed by official evidence such as migration guides, release notes, official docs, or maintainer discussions, and covered by a test or benchmark case.
121
+
122
+ The packaged PyPI wheel includes `llm_code_validator/library_signatures.json`, so users receive reviewed rule updates by upgrading the package:
123
+
124
+ ```bash
125
+ pip install --upgrade llm-code-validator
126
+ ```
127
+
128
+ Use `docs/rules.md` for the contribution workflow and `docs/release.md` for release verification.
129
+
130
+ ## Limitations
131
+
132
+ - Detects known API-drift rules only.
133
+ - Does not detect every possible Python, dependency, security, or runtime issue.
134
+ - Does not prove full program correctness.
135
+ - Complex dynamic imports may be missed.
136
+ - Dependency checks depend on available project metadata.
137
+ - Suggested fixes require review before applying.
138
+ - External repository findings are treated as candidates until manually reviewed.
139
+
140
+ ## Integrations
141
+
142
+ Pre-commit:
143
+
144
+ ```yaml
145
+ repos:
146
+ - repo: https://github.com/mathew-felix/llm-code-validator
147
+ rev: v0.1.0
148
+ hooks:
149
+ - id: llm-code-validator
150
+ ```
151
+
152
+ GitHub Actions:
153
+
154
+ ```yaml
155
+ - run: pip install llm-code-validator
156
+ - run: llm-code-validator check . --format github
157
+ ```
158
+
159
+ ## Development
160
+
161
+ Run tests:
162
+
163
+ ```bash
164
+ pytest -q
165
+ ```
166
+
167
+ Current local result:
168
+
169
+ ```text
170
+ 74 passed
171
+ ```
172
+
173
+ Run benchmarks:
174
+
175
+ ```bash
176
+ python -m llm_code_validator.benchmark --dataset validation_dataset/cli_benchmark_cases.json
177
+ python -m llm_code_validator.benchmark --dataset validation_dataset/ai_stack_benchmark_cases.json
178
+ ```
179
+
180
+ ## More Details
181
+
182
+ - `docs/demo.md`: command walkthrough
183
+ - `docs/accuracy.md`: benchmark and external-review notes
184
+ - `docs/rules.md`: rule database notes
185
+ - `docs/security.md`: local-only, AI-review, and policy controls
186
+ - `docs/ai-review.md`: optional AI-review roadmap and candidate-rule workflow
187
+ - `docs/release.md`: release steps
@@ -0,0 +1,160 @@
1
+ # llm-code-validator
2
+
3
+ Python CLI for checking dependency-heavy Python projects for stale or version-incompatible third-party API usage before commit or CI.
4
+
5
+ It parses Python files with `ast`, checks imports and calls against a maintained API-drift rule database, and reports issues before runtime.
6
+
7
+ Default checks are local-only. No OpenAI, Anthropic, or other LLM API key is required, and the tool does not make network calls in normal use.
8
+
9
+ Current local validation: 74 tests passing, 68 API-drift rules, and PyPI install verified.
10
+
11
+ PyPI: https://pypi.org/project/llm-code-validator/
12
+
13
+ ![Terminal demo showing API drift diagnostics and safe fix preview](docs/demo.gif)
14
+
15
+ ## Install
16
+
17
+ ```bash
18
+ pip install llm-code-validator
19
+ ```
20
+
21
+ For local development:
22
+
23
+ ```bash
24
+ git clone https://github.com/mathew-felix/llm-code-validator
25
+ cd llm-code-validator
26
+ pip install -e ".[dev]"
27
+ ```
28
+
29
+ ## Quick Use
30
+
31
+ ```bash
32
+ llm-code-validator check file.py
33
+ llm-code-validator check src/
34
+ llm-code-validator check --staged
35
+ llm-code-validator check src/ --format json
36
+ llm-code-validator check src/ --format github
37
+ ```
38
+
39
+ Exit codes:
40
+
41
+ - `0`: no diagnostics
42
+ - `1`: diagnostics found
43
+ - `2`: tool error
44
+
45
+ ## Example
46
+
47
+ ```python
48
+ from sqlalchemy.ext.declarative import declarative_base
49
+
50
+ Base = declarative_base()
51
+ ```
52
+
53
+ ```bash
54
+ llm-code-validator check app.py
55
+ ```
56
+
57
+ ```text
58
+ app.py:1 LCV001 warning sqlalchemy.declarative_base sqlalchemy.declarative_base is incompatible with sqlalchemy>=2.0.0
59
+ fix: from sqlalchemy.orm import declarative_base
60
+ ```
61
+
62
+ Preview or apply safe fixes:
63
+
64
+ ```bash
65
+ llm-code-validator fix app.py
66
+ llm-code-validator fix app.py --write
67
+ ```
68
+
69
+ ## What It Checks
70
+
71
+ Current rule database:
72
+
73
+ - 68 API-drift rules
74
+ - 15 safe fixes
75
+ - Rules for OpenAI, Anthropic, LangChain, LangGraph, LlamaIndex, Pinecone, ChromaDB, FastAPI, Pydantic, pandas, NumPy, SQLAlchemy, Torch, and Transformers
76
+
77
+ Validate the rule database:
78
+
79
+ ```bash
80
+ llm-code-validator validate-signatures
81
+ ```
82
+
83
+ This checks source-level API migration patterns. It does not replace Ruff for linting, mypy for type checking, pip-audit for vulnerability checks, or Dependabot for dependency updates.
84
+
85
+ ## Security Model
86
+
87
+ By default, `llm-code-validator` reads local Python files, parses them with Python's built-in `ast` module, and compares imports and calls with the bundled rule database. It does not send source code, dependency files, environment variables, or secrets to any external service.
88
+
89
+ If optional AI-assisted review is added in the future, it should remain explicit opt-in and should minimize and redact any code snippets before a provider request.
90
+
91
+ ## Rule Maintenance
92
+
93
+ Public rules are reviewed before release. New rules should be added to `data/library_signatures.json`, backed by official evidence such as migration guides, release notes, official docs, or maintainer discussions, and covered by a test or benchmark case.
94
+
95
+ The packaged PyPI wheel includes `llm_code_validator/library_signatures.json`, so users receive reviewed rule updates by upgrading the package:
96
+
97
+ ```bash
98
+ pip install --upgrade llm-code-validator
99
+ ```
100
+
101
+ Use `docs/rules.md` for the contribution workflow and `docs/release.md` for release verification.
102
+
103
+ ## Limitations
104
+
105
+ - Detects known API-drift rules only.
106
+ - Does not detect every possible Python, dependency, security, or runtime issue.
107
+ - Does not prove full program correctness.
108
+ - Complex dynamic imports may be missed.
109
+ - Dependency checks depend on available project metadata.
110
+ - Suggested fixes require review before applying.
111
+ - External repository findings are treated as candidates until manually reviewed.
112
+
113
+ ## Integrations
114
+
115
+ Pre-commit:
116
+
117
+ ```yaml
118
+ repos:
119
+ - repo: https://github.com/mathew-felix/llm-code-validator
120
+ rev: v0.1.0
121
+ hooks:
122
+ - id: llm-code-validator
123
+ ```
124
+
125
+ GitHub Actions:
126
+
127
+ ```yaml
128
+ - run: pip install llm-code-validator
129
+ - run: llm-code-validator check . --format github
130
+ ```
131
+
132
+ ## Development
133
+
134
+ Run tests:
135
+
136
+ ```bash
137
+ pytest -q
138
+ ```
139
+
140
+ Current local result:
141
+
142
+ ```text
143
+ 74 passed
144
+ ```
145
+
146
+ Run benchmarks:
147
+
148
+ ```bash
149
+ python -m llm_code_validator.benchmark --dataset validation_dataset/cli_benchmark_cases.json
150
+ python -m llm_code_validator.benchmark --dataset validation_dataset/ai_stack_benchmark_cases.json
151
+ ```
152
+
153
+ ## More Details
154
+
155
+ - `docs/demo.md`: command walkthrough
156
+ - `docs/accuracy.md`: benchmark and external-review notes
157
+ - `docs/rules.md`: rule database notes
158
+ - `docs/security.md`: local-only, AI-review, and policy controls
159
+ - `docs/ai-review.md`: optional AI-review roadmap and candidate-rule workflow
160
+ - `docs/release.md`: release steps
@@ -1,3 +1,3 @@
1
1
  """Deterministic API-drift checker for Python source code."""
2
2
 
3
- __version__ = "0.1.0"
3
+ __version__ = "0.1.1"
@@ -0,0 +1,153 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import re
6
+ from datetime import datetime, timezone
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+
10
+ from .core import EXCLUDED_DIR_NAMES, iter_python_files
11
+
12
+
13
+ SECRET_FILE_NAMES = {
14
+ ".env",
15
+ ".env.local",
16
+ ".env.production",
17
+ "id_rsa",
18
+ "id_dsa",
19
+ "id_ecdsa",
20
+ "id_ed25519",
21
+ }
22
+ SECRET_NAME_PARTS = {"secret", "secrets", "credential", "credentials", "token", "private-key"}
23
+
24
+ SECRET_PATTERNS = [
25
+ re.compile(r"sk-[A-Za-z0-9_-]{12,}"),
26
+ re.compile(r"(?i)(api[_-]?key|secret|token|password)\s*=\s*['\"][^'\"]+['\"]"),
27
+ re.compile(r"(?i)(authorization:\s*bearer\s+)[A-Za-z0-9._~+/=-]+"),
28
+ ]
29
+
30
+
31
+ @dataclass(frozen=True)
32
+ class ProviderConfig:
33
+ provider: str
34
+ api_key_env: str | None
35
+ endpoint: str | None = None
36
+
37
+ @property
38
+ def configured(self) -> bool:
39
+ if self.provider == "local":
40
+ return bool(self.endpoint)
41
+ return bool(self.api_key_env and os.getenv(self.api_key_env))
42
+
43
+
44
+ def default_key_env(provider: str) -> str | None:
45
+ if provider == "openai":
46
+ return "OPENAI_API_KEY"
47
+ if provider == "anthropic":
48
+ return "ANTHROPIC_API_KEY"
49
+ if provider == "azure-openai":
50
+ return "AZURE_OPENAI_API_KEY"
51
+ return None
52
+
53
+
54
+ def redact_secrets(text: str) -> str:
55
+ redacted = text
56
+ for pattern in SECRET_PATTERNS:
57
+ redacted = pattern.sub(_redaction, redacted)
58
+ return redacted
59
+
60
+
61
+ def _redaction(match: re.Match[str]) -> str:
62
+ value = match.group(0)
63
+ if value.lower().startswith("authorization:"):
64
+ return f"{match.group(1)}[REDACTED]"
65
+ if "=" in value:
66
+ name = value.split("=", 1)[0].strip()
67
+ return f"{name} = \"[REDACTED]\""
68
+ return "[REDACTED]"
69
+
70
+
71
+ def is_secret_path(path: Path) -> bool:
72
+ lowered = {part.lower() for part in path.parts}
73
+ if lowered.intersection(EXCLUDED_DIR_NAMES):
74
+ return True
75
+ if path.name.lower() in SECRET_FILE_NAMES:
76
+ return True
77
+ return bool(lowered.intersection(SECRET_NAME_PARTS))
78
+
79
+
80
+ def _extract_relevant_lines(source: str, max_snippet_lines: int) -> list[str]:
81
+ relevant: list[str] = []
82
+ for line in source.splitlines():
83
+ stripped = line.strip()
84
+ if (
85
+ stripped.startswith("import ")
86
+ or stripped.startswith("from ")
87
+ or any(name in stripped.lower() for name in ("key", "secret", "token", "password", "authorization"))
88
+ or "(" in stripped
89
+ or "." in stripped
90
+ or "@" in stripped
91
+ ):
92
+ relevant.append(line)
93
+ if len(relevant) >= max_snippet_lines:
94
+ break
95
+ return relevant
96
+
97
+
98
+ def build_ai_payload(
99
+ paths: list[str],
100
+ *,
101
+ max_snippet_lines: int = 30,
102
+ redact: bool = True,
103
+ ) -> dict[str, object]:
104
+ files = []
105
+ for file_path in iter_python_files(paths):
106
+ if is_secret_path(file_path):
107
+ continue
108
+ try:
109
+ source = file_path.read_text(encoding="utf-8")
110
+ except UnicodeDecodeError:
111
+ source = file_path.read_text(encoding="utf-8", errors="replace")
112
+ snippet = "\n".join(_extract_relevant_lines(source, max_snippet_lines))
113
+ if redact:
114
+ snippet = redact_secrets(snippet)
115
+ files.append({"path": str(file_path), "snippet": snippet})
116
+ return {
117
+ "purpose": "advisory API-drift review",
118
+ "files": files,
119
+ "instructions": (
120
+ "Review only the provided minimized snippets for stale third-party API usage. "
121
+ "Return advisory findings and candidate rules; do not assume full program context."
122
+ ),
123
+ }
124
+
125
+
126
+ def render_ai_payload(payload: dict[str, object]) -> str:
127
+ return json.dumps(payload, indent=2, sort_keys=True)
128
+
129
+
130
+ def write_ai_audit_log(path: str | Path, provider: str, payload: dict[str, object]) -> None:
131
+ files = payload.get("files", [])
132
+ file_count = len(files) if isinstance(files, list) else 0
133
+ record = {
134
+ "timestamp": datetime.now(timezone.utc).isoformat(),
135
+ "provider": provider,
136
+ "file_count": file_count,
137
+ "payload_type": payload.get("purpose", "advisory API-drift review"),
138
+ "contains_source_snippets": False,
139
+ }
140
+ target = Path(path)
141
+ target.parent.mkdir(parents=True, exist_ok=True)
142
+ with target.open("a", encoding="utf-8") as handle:
143
+ handle.write(json.dumps(record, sort_keys=True) + "\n")
144
+
145
+
146
+ def validate_ai_provider(config: ProviderConfig, no_network: bool) -> None:
147
+ if no_network:
148
+ raise RuntimeError("--no-network prevents AI review provider calls")
149
+ if not config.configured:
150
+ if config.provider == "local":
151
+ raise RuntimeError("--ai-provider local requires --ai-endpoint")
152
+ env_name = config.api_key_env or "provider API key environment variable"
153
+ raise RuntimeError(f"--ai-review requires {env_name} to be set")