llm-code-validator 0.1.0__tar.gz → 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_code_validator-0.1.1/PKG-INFO +187 -0
- llm_code_validator-0.1.1/README.md +160 -0
- {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator/__init__.py +1 -1
- llm_code_validator-0.1.1/llm_code_validator/ai_review.py +153 -0
- llm_code_validator-0.1.1/llm_code_validator/cli.py +221 -0
- llm_code_validator-0.1.1/llm_code_validator/config.py +55 -0
- {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator/core.py +8 -4
- {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator/library_signatures.json +107 -103
- llm_code_validator-0.1.1/llm_code_validator/rule_candidates.py +40 -0
- llm_code_validator-0.1.1/llm_code_validator.egg-info/PKG-INFO +187 -0
- {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator.egg-info/SOURCES.txt +4 -0
- llm_code_validator-0.1.1/pyproject.toml +46 -0
- llm_code_validator-0.1.1/tests/test_ai_review.py +36 -0
- llm_code_validator-0.1.1/tests/test_cli.py +287 -0
- {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/tests/test_workflows.py +3 -1
- llm_code_validator-0.1.0/PKG-INFO +0 -220
- llm_code_validator-0.1.0/README.md +0 -209
- llm_code_validator-0.1.0/llm_code_validator/cli.py +0 -105
- llm_code_validator-0.1.0/llm_code_validator.egg-info/PKG-INFO +0 -220
- llm_code_validator-0.1.0/pyproject.toml +0 -26
- llm_code_validator-0.1.0/tests/test_cli.py +0 -142
- {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/LICENSE +0 -0
- {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator/benchmark.py +0 -0
- {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator/diagnostics.py +0 -0
- {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator/fixes.py +0 -0
- {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator/formatting.py +0 -0
- {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator/signatures.py +0 -0
- {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator/versioning.py +0 -0
- {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator.egg-info/dependency_links.txt +0 -0
- {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator.egg-info/entry_points.txt +0 -0
- {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator.egg-info/requires.txt +0 -0
- {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/llm_code_validator.egg-info/top_level.txt +0 -0
- {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/setup.cfg +0 -0
- {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/tests/test_benchmark.py +0 -0
- {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/tests/test_core.py +0 -0
- {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/tests/test_external_repo_evaluation.py +0 -0
- {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/tests/test_fixes.py +0 -0
- {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/tests/test_formatting.py +0 -0
- {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/tests/test_signatures.py +0 -0
- {llm_code_validator-0.1.0 → llm_code_validator-0.1.1}/tests/test_versioning.py +0 -0
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: llm-code-validator
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: CLI guardrail for catching stale Python APIs before runtime.
|
|
5
|
+
Author: Felix Mathew
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/mathew-felix/llm-code-validator
|
|
8
|
+
Project-URL: Repository, https://github.com/mathew-felix/llm-code-validator
|
|
9
|
+
Project-URL: Issues, https://github.com/mathew-felix/llm-code-validator/issues
|
|
10
|
+
Project-URL: PyPI, https://pypi.org/project/llm-code-validator/
|
|
11
|
+
Keywords: api-drift,static-analysis,llm,python,ci
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
20
|
+
Classifier: Topic :: Software Development :: Testing
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pytest>=8; extra == "dev"
|
|
26
|
+
Dynamic: license-file
|
|
27
|
+
|
|
28
|
+
# llm-code-validator
|
|
29
|
+
|
|
30
|
+
Python CLI for checking dependency-heavy Python projects for stale or version-incompatible third-party API usage before commit or CI.
|
|
31
|
+
|
|
32
|
+
It parses Python files with `ast`, checks imports and calls against a maintained API-drift rule database, and reports issues before runtime.
|
|
33
|
+
|
|
34
|
+
Default checks are local-only. No OpenAI, Anthropic, or other LLM API key is required, and the tool does not make network calls in normal use.
|
|
35
|
+
|
|
36
|
+
Current local validation: 74 tests passing, 68 API-drift rules, and PyPI install verified.
|
|
37
|
+
|
|
38
|
+
PyPI: https://pypi.org/project/llm-code-validator/
|
|
39
|
+
|
|
40
|
+

|
|
41
|
+
|
|
42
|
+
## Install
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install llm-code-validator
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
For local development:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
git clone https://github.com/mathew-felix/llm-code-validator
|
|
52
|
+
cd llm-code-validator
|
|
53
|
+
pip install -e ".[dev]"
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Quick Use
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
llm-code-validator check file.py
|
|
60
|
+
llm-code-validator check src/
|
|
61
|
+
llm-code-validator check --staged
|
|
62
|
+
llm-code-validator check src/ --format json
|
|
63
|
+
llm-code-validator check src/ --format github
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Exit codes:
|
|
67
|
+
|
|
68
|
+
- `0`: no diagnostics
|
|
69
|
+
- `1`: diagnostics found
|
|
70
|
+
- `2`: tool error
|
|
71
|
+
|
|
72
|
+
## Example
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from sqlalchemy.ext.declarative import declarative_base
|
|
76
|
+
|
|
77
|
+
Base = declarative_base()
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
llm-code-validator check app.py
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
```text
|
|
85
|
+
app.py:1 LCV001 warning sqlalchemy.declarative_base sqlalchemy.declarative_base is incompatible with sqlalchemy>=2.0.0
|
|
86
|
+
fix: from sqlalchemy.orm import declarative_base
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Preview or apply safe fixes:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
llm-code-validator fix app.py
|
|
93
|
+
llm-code-validator fix app.py --write
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## What It Checks
|
|
97
|
+
|
|
98
|
+
Current rule database:
|
|
99
|
+
|
|
100
|
+
- 68 API-drift rules
|
|
101
|
+
- 15 safe fixes
|
|
102
|
+
- Rules for OpenAI, Anthropic, LangChain, LangGraph, LlamaIndex, Pinecone, ChromaDB, FastAPI, Pydantic, pandas, NumPy, SQLAlchemy, Torch, and Transformers
|
|
103
|
+
|
|
104
|
+
Validate the rule database:
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
llm-code-validator validate-signatures
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
This checks source-level API migration patterns. It does not replace Ruff for linting, mypy for type checking, pip-audit for vulnerability checks, or Dependabot for dependency updates.
|
|
111
|
+
|
|
112
|
+
## Security Model
|
|
113
|
+
|
|
114
|
+
By default, `llm-code-validator` reads local Python files, parses them with Python's built-in `ast` module, and compares imports and calls with the bundled rule database. It does not send source code, dependency files, environment variables, or secrets to any external service.
|
|
115
|
+
|
|
116
|
+
If optional AI-assisted review is added in the future, it should remain explicit opt-in and should minimize and redact any code snippets before a provider request.
|
|
117
|
+
|
|
118
|
+
## Rule Maintenance
|
|
119
|
+
|
|
120
|
+
Public rules are reviewed before release. New rules should be added to `data/library_signatures.json`, backed by official evidence such as migration guides, release notes, official docs, or maintainer discussions, and covered by a test or benchmark case.
|
|
121
|
+
|
|
122
|
+
The packaged PyPI wheel includes `llm_code_validator/library_signatures.json`, so users receive reviewed rule updates by upgrading the package:
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
pip install --upgrade llm-code-validator
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
Use `docs/rules.md` for the contribution workflow and `docs/release.md` for release verification.
|
|
129
|
+
|
|
130
|
+
## Limitations
|
|
131
|
+
|
|
132
|
+
- Detects known API-drift rules only.
|
|
133
|
+
- Does not detect every possible Python, dependency, security, or runtime issue.
|
|
134
|
+
- Does not prove full program correctness.
|
|
135
|
+
- Complex dynamic imports may be missed.
|
|
136
|
+
- Dependency checks depend on available project metadata.
|
|
137
|
+
- Suggested fixes require review before applying.
|
|
138
|
+
- External repository findings are treated as candidates until manually reviewed.
|
|
139
|
+
|
|
140
|
+
## Integrations
|
|
141
|
+
|
|
142
|
+
Pre-commit:
|
|
143
|
+
|
|
144
|
+
```yaml
|
|
145
|
+
repos:
|
|
146
|
+
- repo: https://github.com/mathew-felix/llm-code-validator
|
|
147
|
+
rev: v0.1.0
|
|
148
|
+
hooks:
|
|
149
|
+
- id: llm-code-validator
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
GitHub Actions:
|
|
153
|
+
|
|
154
|
+
```yaml
|
|
155
|
+
- run: pip install llm-code-validator
|
|
156
|
+
- run: llm-code-validator check . --format github
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Development
|
|
160
|
+
|
|
161
|
+
Run tests:
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
pytest -q
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
Current local result:
|
|
168
|
+
|
|
169
|
+
```text
|
|
170
|
+
74 passed
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
Run benchmarks:
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
python -m llm_code_validator.benchmark --dataset validation_dataset/cli_benchmark_cases.json
|
|
177
|
+
python -m llm_code_validator.benchmark --dataset validation_dataset/ai_stack_benchmark_cases.json
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
## More Details
|
|
181
|
+
|
|
182
|
+
- `docs/demo.md`: command walkthrough
|
|
183
|
+
- `docs/accuracy.md`: benchmark and external-review notes
|
|
184
|
+
- `docs/rules.md`: rule database notes
|
|
185
|
+
- `docs/security.md`: local-only, AI-review, and policy controls
|
|
186
|
+
- `docs/ai-review.md`: optional AI-review roadmap and candidate-rule workflow
|
|
187
|
+
- `docs/release.md`: release steps
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# llm-code-validator
|
|
2
|
+
|
|
3
|
+
Python CLI for checking dependency-heavy Python projects for stale or version-incompatible third-party API usage before commit or CI.
|
|
4
|
+
|
|
5
|
+
It parses Python files with `ast`, checks imports and calls against a maintained API-drift rule database, and reports issues before runtime.
|
|
6
|
+
|
|
7
|
+
Default checks are local-only. No OpenAI, Anthropic, or other LLM API key is required, and the tool does not make network calls in normal use.
|
|
8
|
+
|
|
9
|
+
Current local validation: 74 tests passing, 68 API-drift rules, and PyPI install verified.
|
|
10
|
+
|
|
11
|
+
PyPI: https://pypi.org/project/llm-code-validator/
|
|
12
|
+
|
|
13
|
+

|
|
14
|
+
|
|
15
|
+
## Install
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pip install llm-code-validator
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
For local development:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
git clone https://github.com/mathew-felix/llm-code-validator
|
|
25
|
+
cd llm-code-validator
|
|
26
|
+
pip install -e ".[dev]"
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Quick Use
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
llm-code-validator check file.py
|
|
33
|
+
llm-code-validator check src/
|
|
34
|
+
llm-code-validator check --staged
|
|
35
|
+
llm-code-validator check src/ --format json
|
|
36
|
+
llm-code-validator check src/ --format github
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Exit codes:
|
|
40
|
+
|
|
41
|
+
- `0`: no diagnostics
|
|
42
|
+
- `1`: diagnostics found
|
|
43
|
+
- `2`: tool error
|
|
44
|
+
|
|
45
|
+
## Example
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
from sqlalchemy.ext.declarative import declarative_base
|
|
49
|
+
|
|
50
|
+
Base = declarative_base()
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
llm-code-validator check app.py
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
```text
|
|
58
|
+
app.py:1 LCV001 warning sqlalchemy.declarative_base sqlalchemy.declarative_base is incompatible with sqlalchemy>=2.0.0
|
|
59
|
+
fix: from sqlalchemy.orm import declarative_base
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Preview or apply safe fixes:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
llm-code-validator fix app.py
|
|
66
|
+
llm-code-validator fix app.py --write
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## What It Checks
|
|
70
|
+
|
|
71
|
+
Current rule database:
|
|
72
|
+
|
|
73
|
+
- 68 API-drift rules
|
|
74
|
+
- 15 safe fixes
|
|
75
|
+
- Rules for OpenAI, Anthropic, LangChain, LangGraph, LlamaIndex, Pinecone, ChromaDB, FastAPI, Pydantic, pandas, NumPy, SQLAlchemy, Torch, and Transformers
|
|
76
|
+
|
|
77
|
+
Validate the rule database:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
llm-code-validator validate-signatures
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
This checks source-level API migration patterns. It does not replace Ruff for linting, mypy for type checking, pip-audit for vulnerability checks, or Dependabot for dependency updates.
|
|
84
|
+
|
|
85
|
+
## Security Model
|
|
86
|
+
|
|
87
|
+
By default, `llm-code-validator` reads local Python files, parses them with Python's built-in `ast` module, and compares imports and calls with the bundled rule database. It does not send source code, dependency files, environment variables, or secrets to any external service.
|
|
88
|
+
|
|
89
|
+
If optional AI-assisted review is added in the future, it should remain explicit opt-in and should minimize and redact any code snippets before a provider request.
|
|
90
|
+
|
|
91
|
+
## Rule Maintenance
|
|
92
|
+
|
|
93
|
+
Public rules are reviewed before release. New rules should be added to `data/library_signatures.json`, backed by official evidence such as migration guides, release notes, official docs, or maintainer discussions, and covered by a test or benchmark case.
|
|
94
|
+
|
|
95
|
+
The packaged PyPI wheel includes `llm_code_validator/library_signatures.json`, so users receive reviewed rule updates by upgrading the package:
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
pip install --upgrade llm-code-validator
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Use `docs/rules.md` for the contribution workflow and `docs/release.md` for release verification.
|
|
102
|
+
|
|
103
|
+
## Limitations
|
|
104
|
+
|
|
105
|
+
- Detects known API-drift rules only.
|
|
106
|
+
- Does not detect every possible Python, dependency, security, or runtime issue.
|
|
107
|
+
- Does not prove full program correctness.
|
|
108
|
+
- Complex dynamic imports may be missed.
|
|
109
|
+
- Dependency checks depend on available project metadata.
|
|
110
|
+
- Suggested fixes require review before applying.
|
|
111
|
+
- External repository findings are treated as candidates until manually reviewed.
|
|
112
|
+
|
|
113
|
+
## Integrations
|
|
114
|
+
|
|
115
|
+
Pre-commit:
|
|
116
|
+
|
|
117
|
+
```yaml
|
|
118
|
+
repos:
|
|
119
|
+
- repo: https://github.com/mathew-felix/llm-code-validator
|
|
120
|
+
rev: v0.1.0
|
|
121
|
+
hooks:
|
|
122
|
+
- id: llm-code-validator
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
GitHub Actions:
|
|
126
|
+
|
|
127
|
+
```yaml
|
|
128
|
+
- run: pip install llm-code-validator
|
|
129
|
+
- run: llm-code-validator check . --format github
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## Development
|
|
133
|
+
|
|
134
|
+
Run tests:
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
pytest -q
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
Current local result:
|
|
141
|
+
|
|
142
|
+
```text
|
|
143
|
+
74 passed
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
Run benchmarks:
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
python -m llm_code_validator.benchmark --dataset validation_dataset/cli_benchmark_cases.json
|
|
150
|
+
python -m llm_code_validator.benchmark --dataset validation_dataset/ai_stack_benchmark_cases.json
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## More Details
|
|
154
|
+
|
|
155
|
+
- `docs/demo.md`: command walkthrough
|
|
156
|
+
- `docs/accuracy.md`: benchmark and external-review notes
|
|
157
|
+
- `docs/rules.md`: rule database notes
|
|
158
|
+
- `docs/security.md`: local-only, AI-review, and policy controls
|
|
159
|
+
- `docs/ai-review.md`: optional AI-review roadmap and candidate-rule workflow
|
|
160
|
+
- `docs/release.md`: release steps
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from .core import EXCLUDED_DIR_NAMES, iter_python_files
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
SECRET_FILE_NAMES = {
|
|
14
|
+
".env",
|
|
15
|
+
".env.local",
|
|
16
|
+
".env.production",
|
|
17
|
+
"id_rsa",
|
|
18
|
+
"id_dsa",
|
|
19
|
+
"id_ecdsa",
|
|
20
|
+
"id_ed25519",
|
|
21
|
+
}
|
|
22
|
+
SECRET_NAME_PARTS = {"secret", "secrets", "credential", "credentials", "token", "private-key"}
|
|
23
|
+
|
|
24
|
+
SECRET_PATTERNS = [
|
|
25
|
+
re.compile(r"sk-[A-Za-z0-9_-]{12,}"),
|
|
26
|
+
re.compile(r"(?i)(api[_-]?key|secret|token|password)\s*=\s*['\"][^'\"]+['\"]"),
|
|
27
|
+
re.compile(r"(?i)(authorization:\s*bearer\s+)[A-Za-z0-9._~+/=-]+"),
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass(frozen=True)
|
|
32
|
+
class ProviderConfig:
|
|
33
|
+
provider: str
|
|
34
|
+
api_key_env: str | None
|
|
35
|
+
endpoint: str | None = None
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def configured(self) -> bool:
|
|
39
|
+
if self.provider == "local":
|
|
40
|
+
return bool(self.endpoint)
|
|
41
|
+
return bool(self.api_key_env and os.getenv(self.api_key_env))
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def default_key_env(provider: str) -> str | None:
|
|
45
|
+
if provider == "openai":
|
|
46
|
+
return "OPENAI_API_KEY"
|
|
47
|
+
if provider == "anthropic":
|
|
48
|
+
return "ANTHROPIC_API_KEY"
|
|
49
|
+
if provider == "azure-openai":
|
|
50
|
+
return "AZURE_OPENAI_API_KEY"
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def redact_secrets(text: str) -> str:
|
|
55
|
+
redacted = text
|
|
56
|
+
for pattern in SECRET_PATTERNS:
|
|
57
|
+
redacted = pattern.sub(_redaction, redacted)
|
|
58
|
+
return redacted
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _redaction(match: re.Match[str]) -> str:
|
|
62
|
+
value = match.group(0)
|
|
63
|
+
if value.lower().startswith("authorization:"):
|
|
64
|
+
return f"{match.group(1)}[REDACTED]"
|
|
65
|
+
if "=" in value:
|
|
66
|
+
name = value.split("=", 1)[0].strip()
|
|
67
|
+
return f"{name} = \"[REDACTED]\""
|
|
68
|
+
return "[REDACTED]"
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def is_secret_path(path: Path) -> bool:
|
|
72
|
+
lowered = {part.lower() for part in path.parts}
|
|
73
|
+
if lowered.intersection(EXCLUDED_DIR_NAMES):
|
|
74
|
+
return True
|
|
75
|
+
if path.name.lower() in SECRET_FILE_NAMES:
|
|
76
|
+
return True
|
|
77
|
+
return bool(lowered.intersection(SECRET_NAME_PARTS))
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _extract_relevant_lines(source: str, max_snippet_lines: int) -> list[str]:
|
|
81
|
+
relevant: list[str] = []
|
|
82
|
+
for line in source.splitlines():
|
|
83
|
+
stripped = line.strip()
|
|
84
|
+
if (
|
|
85
|
+
stripped.startswith("import ")
|
|
86
|
+
or stripped.startswith("from ")
|
|
87
|
+
or any(name in stripped.lower() for name in ("key", "secret", "token", "password", "authorization"))
|
|
88
|
+
or "(" in stripped
|
|
89
|
+
or "." in stripped
|
|
90
|
+
or "@" in stripped
|
|
91
|
+
):
|
|
92
|
+
relevant.append(line)
|
|
93
|
+
if len(relevant) >= max_snippet_lines:
|
|
94
|
+
break
|
|
95
|
+
return relevant
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def build_ai_payload(
|
|
99
|
+
paths: list[str],
|
|
100
|
+
*,
|
|
101
|
+
max_snippet_lines: int = 30,
|
|
102
|
+
redact: bool = True,
|
|
103
|
+
) -> dict[str, object]:
|
|
104
|
+
files = []
|
|
105
|
+
for file_path in iter_python_files(paths):
|
|
106
|
+
if is_secret_path(file_path):
|
|
107
|
+
continue
|
|
108
|
+
try:
|
|
109
|
+
source = file_path.read_text(encoding="utf-8")
|
|
110
|
+
except UnicodeDecodeError:
|
|
111
|
+
source = file_path.read_text(encoding="utf-8", errors="replace")
|
|
112
|
+
snippet = "\n".join(_extract_relevant_lines(source, max_snippet_lines))
|
|
113
|
+
if redact:
|
|
114
|
+
snippet = redact_secrets(snippet)
|
|
115
|
+
files.append({"path": str(file_path), "snippet": snippet})
|
|
116
|
+
return {
|
|
117
|
+
"purpose": "advisory API-drift review",
|
|
118
|
+
"files": files,
|
|
119
|
+
"instructions": (
|
|
120
|
+
"Review only the provided minimized snippets for stale third-party API usage. "
|
|
121
|
+
"Return advisory findings and candidate rules; do not assume full program context."
|
|
122
|
+
),
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def render_ai_payload(payload: dict[str, object]) -> str:
|
|
127
|
+
return json.dumps(payload, indent=2, sort_keys=True)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def write_ai_audit_log(path: str | Path, provider: str, payload: dict[str, object]) -> None:
|
|
131
|
+
files = payload.get("files", [])
|
|
132
|
+
file_count = len(files) if isinstance(files, list) else 0
|
|
133
|
+
record = {
|
|
134
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
135
|
+
"provider": provider,
|
|
136
|
+
"file_count": file_count,
|
|
137
|
+
"payload_type": payload.get("purpose", "advisory API-drift review"),
|
|
138
|
+
"contains_source_snippets": False,
|
|
139
|
+
}
|
|
140
|
+
target = Path(path)
|
|
141
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
142
|
+
with target.open("a", encoding="utf-8") as handle:
|
|
143
|
+
handle.write(json.dumps(record, sort_keys=True) + "\n")
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def validate_ai_provider(config: ProviderConfig, no_network: bool) -> None:
|
|
147
|
+
if no_network:
|
|
148
|
+
raise RuntimeError("--no-network prevents AI review provider calls")
|
|
149
|
+
if not config.configured:
|
|
150
|
+
if config.provider == "local":
|
|
151
|
+
raise RuntimeError("--ai-provider local requires --ai-endpoint")
|
|
152
|
+
env_name = config.api_key_env or "provider API key environment variable"
|
|
153
|
+
raise RuntimeError(f"--ai-review requires {env_name} to be set")
|