overllm 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- overllm-0.1.0/.gitignore +14 -0
- overllm-0.1.0/.pre-commit-hooks.yaml +7 -0
- overllm-0.1.0/LICENSE +21 -0
- overllm-0.1.0/PKG-INFO +144 -0
- overllm-0.1.0/README.md +124 -0
- overllm-0.1.0/action.yml +71 -0
- overllm-0.1.0/examples/needless_llm_calls.py +53 -0
- overllm-0.1.0/pyproject.toml +39 -0
- overllm-0.1.0/src/overllm/__init__.py +11 -0
- overllm-0.1.0/src/overllm/__main__.py +4 -0
- overllm-0.1.0/src/overllm/analyze.py +118 -0
- overllm-0.1.0/src/overllm/cli.py +64 -0
- overllm-0.1.0/src/overllm/config.py +66 -0
- overllm-0.1.0/src/overllm/detector.py +435 -0
- overllm-0.1.0/src/overllm/models.py +29 -0
- overllm-0.1.0/src/overllm/report.py +157 -0
- overllm-0.1.0/src/overllm/rules.py +137 -0
- overllm-0.1.0/tests/test_cli.py +95 -0
- overllm-0.1.0/tests/test_rules.py +146 -0
overllm-0.1.0/.gitignore
ADDED
overllm-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Adam Danielsson
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
overllm-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: overllm
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Catch the LLM/AI calls you didn't need. A fast, deterministic linter that flags LLM API calls where plain code is simpler, cheaper, and more reliable.
|
|
5
|
+
Project-URL: Homepage, https://github.com/theadamdanielsson/overllm
|
|
6
|
+
Project-URL: Issues, https://github.com/theadamdanielsson/overllm/issues
|
|
7
|
+
Author-email: Adam Danielsson <the.adam.danielsson@gmail.com>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: ai,anthropic,code-quality,cost,linter,llm,openai,pre-commit,static-analysis
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
16
|
+
Requires-Python: >=3.9
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: pytest>=7; extra == 'dev'
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# overllm
|
|
22
|
+
|
|
23
|
+
**Catch the LLM/AI calls you didn't need.**
|
|
24
|
+
|
|
25
|
+
overllm is a small, fast linter with one job: find the places in your code where you call an AI model to do something plain code does better. You called GPT to parse a date. You called a model to extract JSON that `json.loads` already handles. You are paying latency, money, and nondeterminism for a regex.
|
|
26
|
+
|
|
27
|
+
It reads your code with Python's own `ast` module. No model runs, no network, no API key. Same code in, same result out. Fast enough for a pre-commit hook.
|
|
28
|
+
|
|
29
|
+
Everyone else lints the code the AI wrote. overllm catches where you are paying an AI to do what a library already does.
|
|
30
|
+
|
|
31
|
+
## Install
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install overllm
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Use it
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
overllm . # scan the current project
|
|
41
|
+
overllm src/ # scan a folder
|
|
42
|
+
overllm app.py # scan one file
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Example output:
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
app.py:42:5 llm-mechanical LLM call asks the model to sort
|
|
49
|
+
resp = client.chat.completions.create(model="gpt-4o", messages=[...])
|
|
50
|
+
-> use sorted()
|
|
51
|
+
|
|
52
|
+
app.py:88:1 llm-in-loop LLM call inside a loop: one API round-trip per iteration
|
|
53
|
+
completion(model="gpt-4o", messages=[{"role": "user", "content": f"tag {x}"}])
|
|
54
|
+
-> batch the inputs into a single call, cache repeated results, or use a function
|
|
55
|
+
|
|
56
|
+
2 needless LLM calls in 1 file.
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
overllm exits non-zero when it finds something, so it gates a commit or a CI check. Pass `--exit-zero` to report without failing.
|
|
60
|
+
|
|
61
|
+
## Rules
|
|
62
|
+
|
|
63
|
+
Every rule fires only on a concrete code pattern, and every finding names the deterministic replacement. It stays silent when it is not sure.
|
|
64
|
+
|
|
65
|
+
| Rule | Fires when | Suggests |
|
|
66
|
+
| --- | --- | --- |
|
|
67
|
+
| `static-prompt` | The user prompt is a compile-time constant (no variables). The input is fixed, so the call buys nothing. | precompute or cache the result |
|
|
68
|
+
| `llm-extraction` | The prompt asks the model to extract or parse an email, URL, date, number, or JSON. | a regex, `json`, `datetime`, or the SDK's structured-output mode |
|
|
69
|
+
| `llm-mechanical` | The prompt asks for a mechanical transform: sort, reverse, count, sum, deduplicate, change case, base64, arithmetic on literals. | the one-line stdlib equivalent |
|
|
70
|
+
| `llm-in-loop` | An LLM call sits inside a `for`/`async for`/comprehension. One API round-trip per iteration. | batch, cache, or move it out of the loop |
|
|
71
|
+
|
|
72
|
+
It detects calls to the OpenAI, Anthropic, Google, Mistral, Cohere, Groq, LangChain, LiteLLM, and Ollama SDKs, and raw HTTP requests to those hosts.
|
|
73
|
+
|
|
74
|
+
## Silence a false positive
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
resp = client.chat.completions.create(...) # overllm: ignore
|
|
78
|
+
resp = client.chat.completions.create(...) # overllm: ignore=llm-in-loop
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Put `# overllm: ignore-file` at the top of a file to skip the whole file.
|
|
82
|
+
|
|
83
|
+
## Configure
|
|
84
|
+
|
|
85
|
+
In `pyproject.toml` (Python 3.11+):
|
|
86
|
+
|
|
87
|
+
```toml
|
|
88
|
+
[tool.overllm]
|
|
89
|
+
ignore = ["llm-in-loop"]
|
|
90
|
+
exclude = ["examples/", "migrations/"]
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Or on the command line: `--select`, `--ignore`, `--exclude` via config, `--config PATH`.
|
|
94
|
+
|
|
95
|
+
## Pre-commit hook
|
|
96
|
+
|
|
97
|
+
In `.pre-commit-config.yaml`:
|
|
98
|
+
|
|
99
|
+
```yaml
|
|
100
|
+
repos:
|
|
101
|
+
- repo: https://github.com/theadamdanielsson/overllm
|
|
102
|
+
rev: v0.1.0
|
|
103
|
+
hooks:
|
|
104
|
+
- id: overllm
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## GitHub Action
|
|
108
|
+
|
|
109
|
+
overllm ships an Action that scans a pull request and leaves one grounded comment. It stays silent when there is nothing to say.
|
|
110
|
+
|
|
111
|
+
```yaml
|
|
112
|
+
name: overllm
|
|
113
|
+
on:
|
|
114
|
+
pull_request:
|
|
115
|
+
|
|
116
|
+
permissions:
|
|
117
|
+
contents: read
|
|
118
|
+
pull-requests: write
|
|
119
|
+
|
|
120
|
+
jobs:
|
|
121
|
+
check:
|
|
122
|
+
runs-on: ubuntu-latest
|
|
123
|
+
steps:
|
|
124
|
+
- uses: actions/checkout@v4
|
|
125
|
+
- uses: theadamdanielsson/overllm@v1
|
|
126
|
+
with:
|
|
127
|
+
paths: "."
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## Other output formats
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
overllm --format json . # machine-readable
|
|
134
|
+
overllm --format sarif . # upload to GitHub code scanning
|
|
135
|
+
overllm --format markdown . # the PR-comment body
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## Why not just use an AI code reviewer?
|
|
139
|
+
|
|
140
|
+
AI reviewers and AI-slop linters look at the code the model produced: comments, dead code, structure. None of them ask the question overllm asks, which is whether you needed the model at all. It is a different axis, and it is one plain static analysis can answer with high precision and zero cost.
|
|
141
|
+
|
|
142
|
+
## License
|
|
143
|
+
|
|
144
|
+
MIT © Adam Danielsson
|
overllm-0.1.0/README.md
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# overllm
|
|
2
|
+
|
|
3
|
+
**Catch the LLM/AI calls you didn't need.**
|
|
4
|
+
|
|
5
|
+
overllm is a small, fast linter with one job: find the places in your code where you call an AI model to do something plain code does better. You called GPT to parse a date. You called a model to extract JSON that `json.loads` already handles. You are paying latency, money, and nondeterminism for a regex.
|
|
6
|
+
|
|
7
|
+
It reads your code with Python's own `ast` module. No model runs, no network, no API key. Same code in, same result out. Fast enough for a pre-commit hook.
|
|
8
|
+
|
|
9
|
+
Everyone else lints the code the AI wrote. overllm catches where you are paying an AI to do what a library already does.
|
|
10
|
+
|
|
11
|
+
## Install
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install overllm
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Use it
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
overllm . # scan the current project
|
|
21
|
+
overllm src/ # scan a folder
|
|
22
|
+
overllm app.py # scan one file
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Example output:
|
|
26
|
+
|
|
27
|
+
```
|
|
28
|
+
app.py:42:5 llm-mechanical LLM call asks the model to sort
|
|
29
|
+
resp = client.chat.completions.create(model="gpt-4o", messages=[...])
|
|
30
|
+
-> use sorted()
|
|
31
|
+
|
|
32
|
+
app.py:88:1 llm-in-loop LLM call inside a loop: one API round-trip per iteration
|
|
33
|
+
completion(model="gpt-4o", messages=[{"role": "user", "content": f"tag {x}"}])
|
|
34
|
+
-> batch the inputs into a single call, cache repeated results, or use a function
|
|
35
|
+
|
|
36
|
+
2 needless LLM calls in 1 file.
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
overllm exits non-zero when it finds something, so it gates a commit or a CI check. Pass `--exit-zero` to report without failing.
|
|
40
|
+
|
|
41
|
+
## Rules
|
|
42
|
+
|
|
43
|
+
Every rule fires only on a concrete code pattern, and every finding names the deterministic replacement. It stays silent when it is not sure.
|
|
44
|
+
|
|
45
|
+
| Rule | Fires when | Suggests |
|
|
46
|
+
| --- | --- | --- |
|
|
47
|
+
| `static-prompt` | The user prompt is a compile-time constant (no variables). The input is fixed, so the call buys nothing. | precompute or cache the result |
|
|
48
|
+
| `llm-extraction` | The prompt asks the model to extract or parse an email, URL, date, number, or JSON. | a regex, `json`, `datetime`, or the SDK's structured-output mode |
|
|
49
|
+
| `llm-mechanical` | The prompt asks for a mechanical transform: sort, reverse, count, sum, deduplicate, change case, base64, arithmetic on literals. | the one-line stdlib equivalent |
|
|
50
|
+
| `llm-in-loop` | An LLM call sits inside a `for`/`async for`/comprehension. One API round-trip per iteration. | batch, cache, or move it out of the loop |
|
|
51
|
+
|
|
52
|
+
It detects calls to the OpenAI, Anthropic, Google, Mistral, Cohere, Groq, LangChain, LiteLLM, and Ollama SDKs, and raw HTTP requests to those hosts.
|
|
53
|
+
|
|
54
|
+
## Silence a false positive
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
resp = client.chat.completions.create(...) # overllm: ignore
|
|
58
|
+
resp = client.chat.completions.create(...) # overllm: ignore=llm-in-loop
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Put `# overllm: ignore-file` at the top of a file to skip the whole file.
|
|
62
|
+
|
|
63
|
+
## Configure
|
|
64
|
+
|
|
65
|
+
In `pyproject.toml` (Python 3.11+):
|
|
66
|
+
|
|
67
|
+
```toml
|
|
68
|
+
[tool.overllm]
|
|
69
|
+
ignore = ["llm-in-loop"]
|
|
70
|
+
exclude = ["examples/", "migrations/"]
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Or on the command line: `--select`, `--ignore`, `--exclude` via config, `--config PATH`.
|
|
74
|
+
|
|
75
|
+
## Pre-commit hook
|
|
76
|
+
|
|
77
|
+
In `.pre-commit-config.yaml`:
|
|
78
|
+
|
|
79
|
+
```yaml
|
|
80
|
+
repos:
|
|
81
|
+
- repo: https://github.com/theadamdanielsson/overllm
|
|
82
|
+
rev: v0.1.0
|
|
83
|
+
hooks:
|
|
84
|
+
- id: overllm
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## GitHub Action
|
|
88
|
+
|
|
89
|
+
overllm ships an Action that scans a pull request and leaves one grounded comment. It stays silent when there is nothing to say.
|
|
90
|
+
|
|
91
|
+
```yaml
|
|
92
|
+
name: overllm
|
|
93
|
+
on:
|
|
94
|
+
pull_request:
|
|
95
|
+
|
|
96
|
+
permissions:
|
|
97
|
+
contents: read
|
|
98
|
+
pull-requests: write
|
|
99
|
+
|
|
100
|
+
jobs:
|
|
101
|
+
check:
|
|
102
|
+
runs-on: ubuntu-latest
|
|
103
|
+
steps:
|
|
104
|
+
- uses: actions/checkout@v4
|
|
105
|
+
- uses: theadamdanielsson/overllm@v1
|
|
106
|
+
with:
|
|
107
|
+
paths: "."
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## Other output formats
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
overllm --format json . # machine-readable
|
|
114
|
+
overllm --format sarif . # upload to GitHub code scanning
|
|
115
|
+
overllm --format markdown . # the PR-comment body
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## Why not just use an AI code reviewer?
|
|
119
|
+
|
|
120
|
+
AI reviewers and AI-slop linters look at the code the model produced: comments, dead code, structure. None of them ask the question overllm asks, which is whether you needed the model at all. It is a different axis, and it is one plain static analysis can answer with high precision and zero cost.
|
|
121
|
+
|
|
122
|
+
## License
|
|
123
|
+
|
|
124
|
+
MIT © Adam Danielsson
|
overllm-0.1.0/action.yml
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
name: "overllm"
|
|
2
|
+
description: "Flag LLM/AI calls where deterministic code is simpler. Posts one grounded PR comment, silent when clean."
|
|
3
|
+
author: "Adam Danielsson"
|
|
4
|
+
branding:
|
|
5
|
+
icon: "scissors"
|
|
6
|
+
color: "yellow"
|
|
7
|
+
|
|
8
|
+
inputs:
|
|
9
|
+
paths:
|
|
10
|
+
description: "Files or directories to scan."
|
|
11
|
+
required: false
|
|
12
|
+
default: "."
|
|
13
|
+
fail-on-findings:
|
|
14
|
+
description: "Fail the check (exit non-zero) when findings exist."
|
|
15
|
+
required: false
|
|
16
|
+
default: "true"
|
|
17
|
+
comment:
|
|
18
|
+
description: "Post a single grounded comment on the pull request."
|
|
19
|
+
required: false
|
|
20
|
+
default: "true"
|
|
21
|
+
github-token:
|
|
22
|
+
description: "Token used to post the PR comment. Needs pull-requests: write."
|
|
23
|
+
required: false
|
|
24
|
+
default: ${{ github.token }}
|
|
25
|
+
|
|
26
|
+
runs:
|
|
27
|
+
using: "composite"
|
|
28
|
+
steps:
|
|
29
|
+
- name: Install overllm
|
|
30
|
+
shell: bash
|
|
31
|
+
run: |
|
|
32
|
+
python3 -m venv "$RUNNER_TEMP/overllm-venv"
|
|
33
|
+
"$RUNNER_TEMP/overllm-venv/bin/python" -m pip install -q --upgrade pip
|
|
34
|
+
"$RUNNER_TEMP/overllm-venv/bin/python" -m pip install -q "$GITHUB_ACTION_PATH"
|
|
35
|
+
|
|
36
|
+
- name: Run overllm
|
|
37
|
+
id: scan
|
|
38
|
+
shell: bash
|
|
39
|
+
run: |
|
|
40
|
+
BIN="$RUNNER_TEMP/overllm-venv/bin/overllm"
|
|
41
|
+
set +e
|
|
42
|
+
"$BIN" --format markdown ${{ inputs.paths }} > "$RUNNER_TEMP/overllm.md"
|
|
43
|
+
echo "found=$?" >> "$GITHUB_OUTPUT"
|
|
44
|
+
"$BIN" ${{ inputs.paths }} --no-color
|
|
45
|
+
exit 0
|
|
46
|
+
|
|
47
|
+
- name: Comment on PR
|
|
48
|
+
if: ${{ inputs.comment == 'true' && github.event_name == 'pull_request' && steps.scan.outputs.found == '1' }}
|
|
49
|
+
shell: bash
|
|
50
|
+
env:
|
|
51
|
+
GH_TOKEN: ${{ inputs.github-token }}
|
|
52
|
+
PR: ${{ github.event.pull_request.number }}
|
|
53
|
+
REPO: ${{ github.repository }}
|
|
54
|
+
run: |
|
|
55
|
+
marker="<!-- overllm-report -->"
|
|
56
|
+
existing=$(gh api "repos/$REPO/issues/$PR/comments" --paginate \
|
|
57
|
+
--jq ".[] | select(.body | contains(\"$marker\")) | .id" | head -1)
|
|
58
|
+
if [ -n "$existing" ]; then
|
|
59
|
+
gh api -X PATCH "repos/$REPO/issues/comments/$existing" \
|
|
60
|
+
-F body=@"$RUNNER_TEMP/overllm.md" >/dev/null
|
|
61
|
+
else
|
|
62
|
+
gh api -X POST "repos/$REPO/issues/$PR/comments" \
|
|
63
|
+
-F body=@"$RUNNER_TEMP/overllm.md" >/dev/null
|
|
64
|
+
fi
|
|
65
|
+
|
|
66
|
+
- name: Fail if findings
|
|
67
|
+
if: ${{ inputs.fail-on-findings == 'true' && steps.scan.outputs.found == '1' }}
|
|
68
|
+
shell: bash
|
|
69
|
+
run: |
|
|
70
|
+
echo "::error::overllm found LLM calls that plain code could handle. See the job log or PR comment."
|
|
71
|
+
exit 1
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""Examples of LLM calls that plain code should handle. overllm flags every one.
|
|
2
|
+
|
|
3
|
+
The last function is a legitimate call that overllm leaves alone.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from openai import OpenAI
|
|
7
|
+
from litellm import completion
|
|
8
|
+
from langchain_openai import ChatOpenAI
|
|
9
|
+
|
|
10
|
+
client = OpenAI()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def greeting():
|
|
14
|
+
# static-prompt: the input is constant, so the call buys nothing
|
|
15
|
+
return client.chat.completions.create(
|
|
16
|
+
model="gpt-4o",
|
|
17
|
+
messages=[{"role": "user", "content": "Write a short friendly greeting."}],
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def get_email(text):
|
|
22
|
+
# llm-extraction: an email is a regex, not a model call
|
|
23
|
+
return client.chat.completions.create(
|
|
24
|
+
model="gpt-4o",
|
|
25
|
+
messages=[{"role": "user", "content": f"Extract the email address from: {text}"}],
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def sort_names(names):
|
|
30
|
+
# llm-mechanical: this is sorted()
|
|
31
|
+
llm = ChatOpenAI(model="gpt-4o")
|
|
32
|
+
return llm.invoke(f"Sort these names alphabetically: {names}")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def tag_all(items):
|
|
36
|
+
# llm-in-loop: one API round-trip per item
|
|
37
|
+
tags = []
|
|
38
|
+
for item in items:
|
|
39
|
+
tags.append(
|
|
40
|
+
completion(model="gpt-4o", messages=[{"role": "user", "content": f"Tag this: {item}"}])
|
|
41
|
+
)
|
|
42
|
+
return tags
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def draft_reply(message):
|
|
46
|
+
# legitimate: open-ended generation over real input. overllm stays silent.
|
|
47
|
+
return client.chat.completions.create(
|
|
48
|
+
model="gpt-4o",
|
|
49
|
+
messages=[
|
|
50
|
+
{"role": "system", "content": "You are a warm, concise support agent."},
|
|
51
|
+
{"role": "user", "content": f"The customer wrote: {message}. Draft a reply."},
|
|
52
|
+
],
|
|
53
|
+
)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "overllm"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Catch the LLM/AI calls you didn't need. A fast, deterministic linter that flags LLM API calls where plain code is simpler, cheaper, and more reliable."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
authors = [{ name = "Adam Danielsson", email = "the.adam.danielsson@gmail.com" }]
|
|
13
|
+
keywords = ["linter", "llm", "ai", "static-analysis", "pre-commit", "openai", "anthropic", "cost", "code-quality"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
19
|
+
"Topic :: Software Development :: Quality Assurance",
|
|
20
|
+
]
|
|
21
|
+
dependencies = []
|
|
22
|
+
|
|
23
|
+
[project.urls]
|
|
24
|
+
Homepage = "https://github.com/theadamdanielsson/overllm"
|
|
25
|
+
Issues = "https://github.com/theadamdanielsson/overllm/issues"
|
|
26
|
+
|
|
27
|
+
[project.scripts]
|
|
28
|
+
overllm = "overllm.cli:main"
|
|
29
|
+
|
|
30
|
+
[project.optional-dependencies]
|
|
31
|
+
dev = ["pytest>=7"]
|
|
32
|
+
|
|
33
|
+
[tool.hatch.build.targets.wheel]
|
|
34
|
+
packages = ["src/overllm"]
|
|
35
|
+
|
|
36
|
+
[tool.overllm]
|
|
37
|
+
# example config; all optional
|
|
38
|
+
# ignore = ["llm-in-loop"]
|
|
39
|
+
# exclude = ["examples/", "tests/fixtures/"]
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""overllm - catch the LLM/AI calls you didn't need.
|
|
2
|
+
|
|
3
|
+
A fast, deterministic (no-LLM) linter that flags LLM API calls where plain,
|
|
4
|
+
cheaper, more reliable code would do the same job. Built on Python's own `ast`.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
__version__ = "0.1.0"
|
|
8
|
+
|
|
9
|
+
from .models import Finding
|
|
10
|
+
|
|
11
|
+
__all__ = ["Finding", "__version__"]
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""Walk files, parse them, run rules, and apply suppression + config."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import ast
|
|
6
|
+
import re
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from .config import DEFAULT_EXCLUDES, Config
|
|
10
|
+
from .detector import find_llm_calls
|
|
11
|
+
from .models import Finding
|
|
12
|
+
from .rules import run_rules
|
|
13
|
+
|
|
14
|
+
# `# overllm: ignore` or `# overllm: ignore=rule-a,rule-b`
|
|
15
|
+
_IGNORE_RE = re.compile(r"#\s*overllm:\s*ignore(?:=([\w\-,\s]+))?")
|
|
16
|
+
_IGNORE_FILE_RE = re.compile(r"#\s*overllm:\s*ignore-file\b")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def iter_python_files(paths: list[str], excludes: tuple[str, ...]) -> list[Path]:
|
|
20
|
+
all_excludes = set(DEFAULT_EXCLUDES) | set(excludes)
|
|
21
|
+
|
|
22
|
+
def excluded(p: Path) -> bool:
|
|
23
|
+
parts = set(p.parts)
|
|
24
|
+
if parts & set(DEFAULT_EXCLUDES):
|
|
25
|
+
return True
|
|
26
|
+
s = str(p)
|
|
27
|
+
return any(ex and ex in s for ex in excludes)
|
|
28
|
+
|
|
29
|
+
out: list[Path] = []
|
|
30
|
+
for raw in paths:
|
|
31
|
+
p = Path(raw)
|
|
32
|
+
if p.is_file():
|
|
33
|
+
if p.suffix == ".py" and not excluded(p):
|
|
34
|
+
out.append(p)
|
|
35
|
+
elif p.is_dir():
|
|
36
|
+
for f in sorted(p.rglob("*.py")):
|
|
37
|
+
if not excluded(f):
|
|
38
|
+
out.append(f)
|
|
39
|
+
# dedupe, keep order
|
|
40
|
+
seen: set[str] = set()
|
|
41
|
+
uniq: list[Path] = []
|
|
42
|
+
for f in out:
|
|
43
|
+
r = str(f.resolve())
|
|
44
|
+
if r not in seen:
|
|
45
|
+
seen.add(r)
|
|
46
|
+
uniq.append(f)
|
|
47
|
+
return uniq
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _line_directives(lines: list[str]) -> tuple[bool, dict[int, set[str] | None]]:
|
|
51
|
+
"""Return (ignore_whole_file, {line_no: None-for-all | set-of-rule-ids})."""
|
|
52
|
+
per_line: dict[int, set[str] | None] = {}
|
|
53
|
+
ignore_file = False
|
|
54
|
+
for i, line in enumerate(lines, start=1):
|
|
55
|
+
if _IGNORE_FILE_RE.search(line):
|
|
56
|
+
ignore_file = True
|
|
57
|
+
m = _IGNORE_RE.search(line)
|
|
58
|
+
if m:
|
|
59
|
+
raw = m.group(1)
|
|
60
|
+
if raw:
|
|
61
|
+
per_line[i] = {r.strip() for r in raw.split(",") if r.strip()}
|
|
62
|
+
else:
|
|
63
|
+
per_line[i] = None # ignore all rules on this line
|
|
64
|
+
return ignore_file, per_line
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _suppressed(finding: Finding, node_lines: range, per_line: dict[int, set[str] | None]) -> bool:
|
|
68
|
+
# a directive on any physical line of the call (or the line above it) suppresses it
|
|
69
|
+
candidate_lines = set(node_lines) | {finding.line, finding.line - 1}
|
|
70
|
+
for ln in candidate_lines:
|
|
71
|
+
if ln in per_line:
|
|
72
|
+
rules = per_line[ln]
|
|
73
|
+
if rules is None or finding.rule in rules:
|
|
74
|
+
return True
|
|
75
|
+
return False
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def analyze_file(path: Path, config: Config) -> list[Finding]:
|
|
79
|
+
try:
|
|
80
|
+
source = path.read_text(encoding="utf-8")
|
|
81
|
+
except (OSError, UnicodeDecodeError):
|
|
82
|
+
return []
|
|
83
|
+
try:
|
|
84
|
+
tree = ast.parse(source, filename=str(path))
|
|
85
|
+
except SyntaxError:
|
|
86
|
+
return [] # not our job to report parse errors; stay quiet
|
|
87
|
+
|
|
88
|
+
lines = source.splitlines()
|
|
89
|
+
ignore_file, per_line = _line_directives(lines)
|
|
90
|
+
if ignore_file:
|
|
91
|
+
return []
|
|
92
|
+
|
|
93
|
+
calls = find_llm_calls(tree, lines)
|
|
94
|
+
findings: list[Finding] = []
|
|
95
|
+
seen: set[tuple] = set()
|
|
96
|
+
display_path = str(path)
|
|
97
|
+
for call in calls:
|
|
98
|
+
node_start = getattr(call.node, "lineno", call.line)
|
|
99
|
+
node_end = getattr(call.node, "end_lineno", node_start) or node_start
|
|
100
|
+
node_lines = range(node_start, node_end + 1)
|
|
101
|
+
for f in run_rules(call, display_path):
|
|
102
|
+
if not config.enabled(f.rule):
|
|
103
|
+
continue
|
|
104
|
+
if f.key in seen:
|
|
105
|
+
continue
|
|
106
|
+
if _suppressed(f, node_lines, per_line):
|
|
107
|
+
continue
|
|
108
|
+
seen.add(f.key)
|
|
109
|
+
findings.append(f)
|
|
110
|
+
return findings
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def analyze_paths(paths: list[str], config: Config) -> list[Finding]:
|
|
114
|
+
findings: list[Finding] = []
|
|
115
|
+
for f in iter_python_files(paths, config.exclude):
|
|
116
|
+
findings.extend(analyze_file(f, config))
|
|
117
|
+
findings.sort(key=lambda x: (x.path, x.line, x.col, x.rule))
|
|
118
|
+
return findings
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""Command-line entry point for overllm."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from . import __version__
|
|
10
|
+
from .analyze import analyze_paths
|
|
11
|
+
from .config import load_config
|
|
12
|
+
from .report import render
|
|
13
|
+
from .rules import ALL_RULES
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
17
|
+
p = argparse.ArgumentParser(
|
|
18
|
+
prog="overllm",
|
|
19
|
+
description="Catch the LLM/AI calls you didn't need. Flags LLM API calls "
|
|
20
|
+
"where deterministic code is simpler, cheaper, and more reliable.",
|
|
21
|
+
)
|
|
22
|
+
p.add_argument("paths", nargs="*", default=["."], help="files or directories to scan (default: .)")
|
|
23
|
+
p.add_argument("--format", choices=["human", "json", "sarif", "markdown"], default="human")
|
|
24
|
+
p.add_argument("--select", help="comma-separated rule ids to run (default: all)")
|
|
25
|
+
p.add_argument("--ignore", help="comma-separated rule ids to skip")
|
|
26
|
+
p.add_argument("--config", type=Path, help="path to a config file (pyproject.toml or .overllm.toml)")
|
|
27
|
+
p.add_argument("--exit-zero", action="store_true", help="always exit 0, even when findings exist")
|
|
28
|
+
p.add_argument("--no-color", action="store_true", help="disable ANSI colors")
|
|
29
|
+
p.add_argument("--version", action="version", version=f"overllm {__version__}")
|
|
30
|
+
return p
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _split(value: str | None) -> tuple[str, ...]:
|
|
34
|
+
if not value:
|
|
35
|
+
return ()
|
|
36
|
+
return tuple(v.strip() for v in value.split(",") if v.strip())
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def main(argv: list[str] | None = None) -> int:
|
|
40
|
+
args = build_parser().parse_args(argv)
|
|
41
|
+
paths = args.paths or ["."]
|
|
42
|
+
|
|
43
|
+
config = load_config(explicit=args.config)
|
|
44
|
+
select = _split(args.select)
|
|
45
|
+
if select:
|
|
46
|
+
config.select = tuple(r for r in select if r in ALL_RULES) or ALL_RULES
|
|
47
|
+
ignore = _split(args.ignore)
|
|
48
|
+
if ignore:
|
|
49
|
+
config.ignore = tuple(set(config.ignore) | set(ignore))
|
|
50
|
+
|
|
51
|
+
findings = analyze_paths(paths, config)
|
|
52
|
+
|
|
53
|
+
use_color = sys.stdout.isatty() and not args.no_color
|
|
54
|
+
output = render(findings, args.format, use_color=use_color)
|
|
55
|
+
if output:
|
|
56
|
+
print(output)
|
|
57
|
+
|
|
58
|
+
if args.exit_zero:
|
|
59
|
+
return 0
|
|
60
|
+
return 1 if findings else 0
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
if __name__ == "__main__":
|
|
64
|
+
raise SystemExit(main())
|