apidrift 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apidrift-0.0.1/.gitattributes +2 -0
- apidrift-0.0.1/.gitignore +33 -0
- apidrift-0.0.1/.pre-commit-hooks.yaml +19 -0
- apidrift-0.0.1/LICENSE +21 -0
- apidrift-0.0.1/PKG-INFO +269 -0
- apidrift-0.0.1/README.md +238 -0
- apidrift-0.0.1/action.yml +23 -0
- apidrift-0.0.1/examples/ai_generated.py +47 -0
- apidrift-0.0.1/pyproject.toml +71 -0
- apidrift-0.0.1/src/apidrift/__init__.py +18 -0
- apidrift-0.0.1/src/apidrift/__main__.py +8 -0
- apidrift-0.0.1/src/apidrift/cache.py +124 -0
- apidrift-0.0.1/src/apidrift/checks.py +187 -0
- apidrift-0.0.1/src/apidrift/cli.py +175 -0
- apidrift-0.0.1/src/apidrift/introspect.py +251 -0
- apidrift-0.0.1/src/apidrift/py.typed +0 -0
- apidrift-0.0.1/src/apidrift/report.py +150 -0
- apidrift-0.0.1/src/apidrift/resolver.py +344 -0
- apidrift-0.0.1/tests/conftest.py +28 -0
- apidrift-0.0.1/tests/fixtures/legacy_lib.py +61 -0
- apidrift-0.0.1/tests/test_benchmark.py +299 -0
- apidrift-0.0.1/tests/test_cache.py +71 -0
- apidrift-0.0.1/tests/test_checks.py +257 -0
- apidrift-0.0.1/tests/test_cli.py +104 -0
- apidrift-0.0.1/tests/test_report.py +179 -0
- apidrift-0.0.1/tests/test_resolver.py +222 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
.eggs/
|
|
6
|
+
build/
|
|
7
|
+
dist/
|
|
8
|
+
*.egg
|
|
9
|
+
|
|
10
|
+
# Virtual envs
|
|
11
|
+
.venv/
|
|
12
|
+
venv/
|
|
13
|
+
env/
|
|
14
|
+
|
|
15
|
+
# Tooling caches
|
|
16
|
+
.mypy_cache/
|
|
17
|
+
.ruff_cache/
|
|
18
|
+
.pytest_cache/
|
|
19
|
+
.cache/
|
|
20
|
+
|
|
21
|
+
# apidrift's own resolved-member cache (lands here from M3 onward)
|
|
22
|
+
.apidrift_cache/
|
|
23
|
+
|
|
24
|
+
# Editors / OS
|
|
25
|
+
.vscode/
|
|
26
|
+
.idea/
|
|
27
|
+
.DS_Store
|
|
28
|
+
Thumbs.db
|
|
29
|
+
|
|
30
|
+
# Local working notes / internal design docs — kept out so history is public-ready.
|
|
31
|
+
# (Remove a line below to start versioning that file.)
|
|
32
|
+
HANDOFF.md
|
|
33
|
+
Brief.md
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# pre-commit hook definition. Add apidrift to your .pre-commit-config.yaml:
|
|
2
|
+
#
|
|
3
|
+
# repos:
|
|
4
|
+
# - repo: https://github.com/R1ch1k/apidrift
|
|
5
|
+
# rev: v0.0.1
|
|
6
|
+
# hooks:
|
|
7
|
+
# - id: apidrift
|
|
8
|
+
#
|
|
9
|
+
# NOTE: language is `system`, not `python`. apidrift checks calls against the versions
|
|
10
|
+
# of your dependencies that are *actually installed*, so it must run in your project's
|
|
11
|
+
# environment (where those packages live), not in an isolated hook venv that wouldn't
|
|
12
|
+
# have them. Install apidrift into that environment: `pip install apidrift`.
|
|
13
|
+
- id: apidrift
|
|
14
|
+
name: apidrift
|
|
15
|
+
description: Flag API calls that don't exist in the installed dependency version.
|
|
16
|
+
entry: apidrift
|
|
17
|
+
language: system
|
|
18
|
+
types: [python]
|
|
19
|
+
require_serial: true
|
apidrift-0.0.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Richik Mandal
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
apidrift-0.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: apidrift
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: CI guard that flags API calls missing from the dependency version actually installed.
|
|
5
|
+
Project-URL: Homepage, https://github.com/R1ch1k/apidrift
|
|
6
|
+
Project-URL: Repository, https://github.com/R1ch1k/apidrift
|
|
7
|
+
Project-URL: Issues, https://github.com/R1ch1k/apidrift/issues
|
|
8
|
+
Author-email: Richik Mandal <richikmandal18@gmail.com>
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: api,ast,ci,linter,llm,static-analysis,version-drift
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
21
|
+
Classifier: Typing :: Typed
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Provides-Extra: bench
|
|
24
|
+
Requires-Dist: pandas==2.3.3; extra == 'bench'
|
|
25
|
+
Requires-Dist: scikit-learn==1.8.0; extra == 'bench'
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: mypy; extra == 'dev'
|
|
28
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
29
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
30
|
+
Description-Content-Type: text/markdown
|
|
31
|
+
|
|
32
|
+
# apidrift
|
|
33
|
+
|
|
34
|
+
> A CI guard that flags API calls which don't exist in the dependency version **actually installed right now** — the way LLM-generated code breaks.
|
|
35
|
+
|
|
36
|
+
LLM-generated Python confidently calls functions, methods, and keyword arguments that
|
|
37
|
+
were valid in some *older* version of a library and are gone in the one you have pinned:
|
|
38
|
+
hallucinated names, renamed functions, parameters removed in a later major version. A
|
|
39
|
+
type-checker reads stubs and can miss this; apidrift checks **existence and
|
|
40
|
+
version-validity against the live installed package**. Deterministic, zero network, $0.
|
|
41
|
+
|
|
42
|
+
<!-- DEMO GIF: replace with examples/demo.gif once recorded -->
|
|
43
|
+
|
|
44
|
+
```text
|
|
45
|
+
$ apidrift examples/ai_generated.py
|
|
46
|
+
examples/ai_generated.py:17 ERROR pandas.read_exel not found in pandas 2.3.3
|
|
47
|
+
└─ did you mean: pandas.read_excel?
|
|
48
|
+
examples/ai_generated.py:20 ERROR pandas.concatenate not found in pandas 2.3.3
|
|
49
|
+
└─ did you mean: pandas.concat?
|
|
50
|
+
examples/ai_generated.py:23 ERROR pandas.TimeGrouper not found in pandas 2.3.3
|
|
51
|
+
└─ did you mean: pandas.Grouper?
|
|
52
|
+
examples/ai_generated.py:37 ERROR pandas.read_csv() unexpected keyword 'mangle_dupe_cols'
|
|
53
|
+
|
|
54
|
+
4 problems · checked against your installed versions
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Four findings, four distinct drift mechanisms — a typo, a cross-library confusion
|
|
58
|
+
(`concatenate` is numpy; pandas spells it `concat`), a symbol removed in a major
|
|
59
|
+
version, and a keyword argument (`mangle_dupe_cols`) removed in pandas 2.0. That last
|
|
60
|
+
line is the point: apidrift is checked against *your* environment, not a stub set.
|
|
61
|
+
|
|
62
|
+
## The wedge vs pyright / mypy
|
|
63
|
+
|
|
64
|
+
**pyright and mypy check types against stubs; apidrift checks existence and
|
|
65
|
+
version-validity against your actually-installed package.** An LLM trained on pandas 1.x
|
|
66
|
+
that writes a removed keyword passes a stale-stub type check but breaks at runtime —
|
|
67
|
+
apidrift catches that; type-checkers don't. Package-*name* hallucination is already owned
|
|
68
|
+
by other tools; apidrift owns the **signature / parameter-level** slice for your pinned
|
|
69
|
+
versions.
|
|
70
|
+
|
|
71
|
+
## Install
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
pip install apidrift # PyPI release pending — see "from source" below
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Until the first PyPI release, install from source (works once the repo is public):
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
pip install git+https://github.com/R1ch1k/apidrift
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
**Important:** apidrift checks calls against the versions of your dependencies that are
|
|
84
|
+
*actually installed*, so install it into — and run it from — the same environment as your
|
|
85
|
+
project's dependencies. Run in a clean env with nothing installed and apidrift will (by
|
|
86
|
+
design) find nothing to check.
|
|
87
|
+
|
|
88
|
+
## Quickstart
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
apidrift path/ file.py # check files, directories, or globs
|
|
92
|
+
apidrift src # walk a package recursively
|
|
93
|
+
apidrift examples/ai_generated.py --verbose # also show what was skipped, and why
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Exit codes gate CI directly: `1` if any **error** (missing symbol or invalid keyword),
|
|
97
|
+
`0` if clean — or if only deprecation **notices** remain (they don't fail the build).
|
|
98
|
+
|
|
99
|
+
## The three checks
|
|
100
|
+
|
|
101
|
+
- **Check A — symbol existence.** Walks the resolved dotted path against the installed
|
|
102
|
+
package and flags a segment genuinely absent from a cleanly introspectable parent, with
|
|
103
|
+
a `difflib` "did you mean".
|
|
104
|
+
- **Check B — keyword-arg validity.** Flags a keyword the resolved callable's signature
|
|
105
|
+
does not accept. Stays silent if the signature declares `**kwargs` (any keyword could be
|
|
106
|
+
valid).
|
|
107
|
+
- **Check C — PEP 702 deprecation.** Flags a symbol carrying a `__deprecated__` marker
|
|
108
|
+
(set by `warnings.deprecated` / `typing_extensions.deprecated`). This is a **NOTICE**,
|
|
109
|
+
not an error — deprecated code still works, so it **does not gate CI** (exit 0).
|
|
110
|
+
|
|
111
|
+
## JSON output
|
|
112
|
+
|
|
113
|
+
`--json` emits a stable document instead of text (identical exit codes):
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
apidrift src --json
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
```json
|
|
120
|
+
{
|
|
121
|
+
"schema_version": 1,
|
|
122
|
+
"findings": [
|
|
123
|
+
{
|
|
124
|
+
"path": "examples/ai_generated.py",
|
|
125
|
+
"line": 17,
|
|
126
|
+
"column": 8,
|
|
127
|
+
"severity": "ERROR",
|
|
128
|
+
"check": "existence",
|
|
129
|
+
"symbol": "pandas.read_exel",
|
|
130
|
+
"message": "pandas.read_exel not found in pandas 2.3.3",
|
|
131
|
+
"suggestion": "pandas.read_excel",
|
|
132
|
+
"package": "pandas",
|
|
133
|
+
"version": "2.3.3"
|
|
134
|
+
}
|
|
135
|
+
],
|
|
136
|
+
"summary": { "errors": 1, "notices": 0, "total": 1, "exit_code": 1 }
|
|
137
|
+
}
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
| field | meaning |
|
|
141
|
+
| --- | --- |
|
|
142
|
+
| `path` | source file, always forward-slashed (OS-independent; the text report keeps native separators) |
|
|
143
|
+
| `severity` | `ERROR` (gates CI) or `NOTICE` (deprecation; does not gate) |
|
|
144
|
+
| `check` | `existence`, `keyword`, or `deprecation` |
|
|
145
|
+
| `symbol` | the fully-qualified target the finding is about |
|
|
146
|
+
| `message` | the rendered human headline |
|
|
147
|
+
| `suggestion` | the "did you mean" replacement, or `null` |
|
|
148
|
+
| `package` / `version` | the resolved package and the installed version checked against |
|
|
149
|
+
| `summary.exit_code` | the process exit code — always matches the text run |
|
|
150
|
+
|
|
151
|
+
## pre-commit
|
|
152
|
+
|
|
153
|
+
```yaml
|
|
154
|
+
# .pre-commit-config.yaml
|
|
155
|
+
repos:
|
|
156
|
+
- repo: https://github.com/R1ch1k/apidrift
|
|
157
|
+
rev: v0.0.1
|
|
158
|
+
hooks:
|
|
159
|
+
- id: apidrift
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
The hook uses `language: system` deliberately: apidrift must run in your project's
|
|
163
|
+
environment (where your dependencies live), so install it there — `pip install apidrift`.
|
|
164
|
+
|
|
165
|
+
## GitHub Action
|
|
166
|
+
|
|
167
|
+
Install your dependencies first, then run apidrift against the repo:
|
|
168
|
+
|
|
169
|
+
```yaml
|
|
170
|
+
# .github/workflows/apidrift.yml
|
|
171
|
+
name: apidrift
|
|
172
|
+
on: [push, pull_request]
|
|
173
|
+
jobs:
|
|
174
|
+
apidrift:
|
|
175
|
+
runs-on: ubuntu-latest
|
|
176
|
+
steps:
|
|
177
|
+
- uses: actions/checkout@v4
|
|
178
|
+
- uses: actions/setup-python@v5
|
|
179
|
+
with:
|
|
180
|
+
python-version: "3.x"
|
|
181
|
+
- run: pip install -e . # install YOUR deps so apidrift can introspect them
|
|
182
|
+
- uses: R1ch1k/apidrift@v0.0.1
|
|
183
|
+
with:
|
|
184
|
+
paths: "src tests"
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
The step fails the build on errors and passes on a clean (or notices-only) run.
|
|
188
|
+
|
|
189
|
+
## What apidrift deliberately does *not* flag
|
|
190
|
+
|
|
191
|
+
Precision is the whole adoption story, so silence is a feature — and a linter that
|
|
192
|
+
documents its silence is one you can trust to gate CI. apidrift stays quiet, by design,
|
|
193
|
+
on:
|
|
194
|
+
|
|
195
|
+
- **`openai.ChatCompletion.create(...)`** — openai 1.x keeps `ChatCompletion` as an
|
|
196
|
+
`APIRemovedInV1Proxy` whose `__getattr__` absorbs *any* attribute (so `.create`
|
|
197
|
+
"exists") and raises only when the call is *executed*. The symbol genuinely exists, so
|
|
198
|
+
an existence check correctly says nothing. That is a call-time/deprecation failure, a
|
|
199
|
+
different and harder problem than existence — not a missing name.
|
|
200
|
+
- **A non-deprecated subclass of a deprecated class** — the deprecation check reads the
|
|
201
|
+
symbol's *own* `__dict__`, never inherited attributes. A `__deprecated__` marker on a
|
|
202
|
+
base class does not make a live subclass that merely inherits from it look deprecated.
|
|
203
|
+
- **Anything unverifiable** — if a package fails to import, exposes a C-extension callable
|
|
204
|
+
with no introspectable signature, is a dynamic `__getattr__` module, or the called
|
|
205
|
+
function declares `**kwargs`, apidrift emits nothing. It never guesses.
|
|
206
|
+
|
|
207
|
+
apidrift would always rather miss a real drift than raise a false alarm on code that is
|
|
208
|
+
genuinely fine. A linter that cries wolf gets uninstalled.
|
|
209
|
+
|
|
210
|
+
## Scope (v0)
|
|
211
|
+
|
|
212
|
+
**In:** Python; calls whose receiver traces to an imported module or imported name
|
|
213
|
+
(`mod.func(...)`, `mod.sub.Class(...)`, `from mod import func; func(...)`); checked
|
|
214
|
+
against third-party packages installed in the current environment.
|
|
215
|
+
|
|
216
|
+
**Out (honest limitations, not bugs):**
|
|
217
|
+
|
|
218
|
+
- Method calls on inferred-type receivers (`df.merge(...)` where `df` is a local) — that
|
|
219
|
+
needs type inference, which is mypy's job.
|
|
220
|
+
- Cross-file flow analysis; autofix (suggestions only).
|
|
221
|
+
- C-extension callables with no introspectable signature → unverifiable, never flagged.
|
|
222
|
+
- stdlib (rarely LLM-hallucinated) and relative / first-party imports (no "installed
|
|
223
|
+
version" to check against).
|
|
224
|
+
|
|
225
|
+
## How it works
|
|
226
|
+
|
|
227
|
+
1. **Resolve** (`resolver.py`) — AST → import table → fully-qualified call targets rooted
|
|
228
|
+
at an installed third-party package. Sound-by-default: reassigned/ambiguous/shadowed
|
|
229
|
+
names are dropped, wildcard-origin bare names are refused, method-on-local receivers
|
|
230
|
+
are skipped.
|
|
231
|
+
2. **Introspect** (`introspect.py`) — import the package and walk the path into a
|
|
232
|
+
serializable record (existence / signature / `__deprecated__`). Every failure mode
|
|
233
|
+
degrades to "unverifiable".
|
|
234
|
+
3. **Check** (`checks.py`) — pure logic over the record: existence, keyword validity,
|
|
235
|
+
deprecation.
|
|
236
|
+
4. **Cache** (`cache.py`) — records are cached to disk keyed by `(package, version)`, so
|
|
237
|
+
repeat runs skip the import entirely. A version bump misses and re-introspects. Escape
|
|
238
|
+
hatches: `--no-cache`, `--clear-cache`.
|
|
239
|
+
|
|
240
|
+
## Design tenets
|
|
241
|
+
|
|
242
|
+
1. **Silence beats a false alarm.** If a call can't be resolved with confidence, apidrift
|
|
243
|
+
emits nothing. Sound-by-default — precision over recall.
|
|
244
|
+
2. **Deterministic, $0.** No model or network calls anywhere in detection. Pure AST +
|
|
245
|
+
introspection.
|
|
246
|
+
3. **DX is the moat.** Zero-config, CI-native, near-zero dependencies. "Did-you-mean"
|
|
247
|
+
suggestions are first-class.
|
|
248
|
+
|
|
249
|
+
## Validation
|
|
250
|
+
|
|
251
|
+
apidrift is tested with hand-written good/bad fixture pairs per seed library (openai,
|
|
252
|
+
pandas, requests, …) and a soundness-weighted suite that asserts it stays *silent* on the
|
|
253
|
+
ambiguous, `**kwargs`, C-extension, wildcard-import, dynamic-`__getattr__`, and
|
|
254
|
+
deprecation-proxy cases.
|
|
255
|
+
|
|
256
|
+
On top of that it is **validated against 30 real version-drift cases curated from the
|
|
257
|
+
pandas and scikit-learn release notes** (removed/renamed symbols and removed keyword
|
|
258
|
+
arguments), covering the same drift classes catalogued by benchmarks such as GitChameleon,
|
|
259
|
+
VersiCode, and CodeUpdateArena. Each case asserts *both* directions: the drifted call is
|
|
260
|
+
flagged, **and** its modern replacement stays silent. The set is pinned to the versions it
|
|
261
|
+
was verified against (`pandas==2.3.3`, `scikit-learn==1.8.0`); install them with
|
|
262
|
+
`pip install -e .[bench]` and run `pytest tests/test_benchmark.py` to reproduce the count.
|
|
263
|
+
Candidates that could not be flagged soundly (e.g. pydantic's `**kwargs`-accepting `Field`)
|
|
264
|
+
were dropped rather than counted — the number is the empirically passing total, not an
|
|
265
|
+
aspiration.
|
|
266
|
+
|
|
267
|
+
## License
|
|
268
|
+
|
|
269
|
+
MIT
|
apidrift-0.0.1/README.md
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
# apidrift
|
|
2
|
+
|
|
3
|
+
> A CI guard that flags API calls which don't exist in the dependency version **actually installed right now** — the way LLM-generated code breaks.
|
|
4
|
+
|
|
5
|
+
LLM-generated Python confidently calls functions, methods, and keyword arguments that
|
|
6
|
+
were valid in some *older* version of a library and are gone in the one you have pinned:
|
|
7
|
+
hallucinated names, renamed functions, parameters removed in a later major version. A
|
|
8
|
+
type-checker reads stubs and can miss this; apidrift checks **existence and
|
|
9
|
+
version-validity against the live installed package**. Deterministic, zero network, $0.
|
|
10
|
+
|
|
11
|
+
<!-- DEMO GIF: replace with examples/demo.gif once recorded -->
|
|
12
|
+
|
|
13
|
+
```text
|
|
14
|
+
$ apidrift examples/ai_generated.py
|
|
15
|
+
examples/ai_generated.py:17 ERROR pandas.read_exel not found in pandas 2.3.3
|
|
16
|
+
└─ did you mean: pandas.read_excel?
|
|
17
|
+
examples/ai_generated.py:20 ERROR pandas.concatenate not found in pandas 2.3.3
|
|
18
|
+
└─ did you mean: pandas.concat?
|
|
19
|
+
examples/ai_generated.py:23 ERROR pandas.TimeGrouper not found in pandas 2.3.3
|
|
20
|
+
└─ did you mean: pandas.Grouper?
|
|
21
|
+
examples/ai_generated.py:37 ERROR pandas.read_csv() unexpected keyword 'mangle_dupe_cols'
|
|
22
|
+
|
|
23
|
+
4 problems · checked against your installed versions
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Four findings, four distinct drift mechanisms — a typo, a cross-library confusion
|
|
27
|
+
(`concatenate` is numpy; pandas spells it `concat`), a symbol removed in a major
|
|
28
|
+
version, and a keyword argument (`mangle_dupe_cols`) removed in pandas 2.0. That last
|
|
29
|
+
line is the point: apidrift is checked against *your* environment, not a stub set.
|
|
30
|
+
|
|
31
|
+
## The wedge vs pyright / mypy
|
|
32
|
+
|
|
33
|
+
**pyright and mypy check types against stubs; apidrift checks existence and
|
|
34
|
+
version-validity against your actually-installed package.** An LLM trained on pandas 1.x
|
|
35
|
+
that writes a removed keyword passes a stale-stub type check but breaks at runtime —
|
|
36
|
+
apidrift catches that; type-checkers don't. Package-*name* hallucination is already owned
|
|
37
|
+
by other tools; apidrift owns the **signature / parameter-level** slice for your pinned
|
|
38
|
+
versions.
|
|
39
|
+
|
|
40
|
+
## Install
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pip install apidrift # PyPI release pending — see "from source" below
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Until the first PyPI release, install from source (works once the repo is public):
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
pip install git+https://github.com/R1ch1k/apidrift
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
**Important:** apidrift checks calls against the versions of your dependencies that are
|
|
53
|
+
*actually installed*, so install it into — and run it from — the same environment as your
|
|
54
|
+
project's dependencies. Run in a clean env with nothing installed and apidrift will (by
|
|
55
|
+
design) find nothing to check.
|
|
56
|
+
|
|
57
|
+
## Quickstart
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
apidrift path/ file.py # check files, directories, or globs
|
|
61
|
+
apidrift src # walk a package recursively
|
|
62
|
+
apidrift examples/ai_generated.py --verbose # also show what was skipped, and why
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Exit codes gate CI directly: `1` if any **error** (missing symbol or invalid keyword),
|
|
66
|
+
`0` if clean — or if only deprecation **notices** remain (they don't fail the build).
|
|
67
|
+
|
|
68
|
+
## The three checks
|
|
69
|
+
|
|
70
|
+
- **Check A — symbol existence.** Walks the resolved dotted path against the installed
|
|
71
|
+
package and flags a segment genuinely absent from a cleanly introspectable parent, with
|
|
72
|
+
a `difflib` "did you mean".
|
|
73
|
+
- **Check B — keyword-arg validity.** Flags a keyword the resolved callable's signature
|
|
74
|
+
does not accept. Stays silent if the signature declares `**kwargs` (any keyword could be
|
|
75
|
+
valid).
|
|
76
|
+
- **Check C — PEP 702 deprecation.** Flags a symbol carrying a `__deprecated__` marker
|
|
77
|
+
(set by `warnings.deprecated` / `typing_extensions.deprecated`). This is a **NOTICE**,
|
|
78
|
+
not an error — deprecated code still works, so it **does not gate CI** (exit 0).
|
|
79
|
+
|
|
80
|
+
## JSON output
|
|
81
|
+
|
|
82
|
+
`--json` emits a stable document instead of text (identical exit codes):
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
apidrift src --json
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
```json
|
|
89
|
+
{
|
|
90
|
+
"schema_version": 1,
|
|
91
|
+
"findings": [
|
|
92
|
+
{
|
|
93
|
+
"path": "examples/ai_generated.py",
|
|
94
|
+
"line": 17,
|
|
95
|
+
"column": 8,
|
|
96
|
+
"severity": "ERROR",
|
|
97
|
+
"check": "existence",
|
|
98
|
+
"symbol": "pandas.read_exel",
|
|
99
|
+
"message": "pandas.read_exel not found in pandas 2.3.3",
|
|
100
|
+
"suggestion": "pandas.read_excel",
|
|
101
|
+
"package": "pandas",
|
|
102
|
+
"version": "2.3.3"
|
|
103
|
+
}
|
|
104
|
+
],
|
|
105
|
+
"summary": { "errors": 1, "notices": 0, "total": 1, "exit_code": 1 }
|
|
106
|
+
}
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
| field | meaning |
|
|
110
|
+
| --- | --- |
|
|
111
|
+
| `path` | source file, always forward-slashed (OS-independent; the text report keeps native separators) |
|
|
112
|
+
| `severity` | `ERROR` (gates CI) or `NOTICE` (deprecation; does not gate) |
|
|
113
|
+
| `check` | `existence`, `keyword`, or `deprecation` |
|
|
114
|
+
| `symbol` | the fully-qualified target the finding is about |
|
|
115
|
+
| `message` | the rendered human headline |
|
|
116
|
+
| `suggestion` | the "did you mean" replacement, or `null` |
|
|
117
|
+
| `package` / `version` | the resolved package and the installed version checked against |
|
|
118
|
+
| `summary.exit_code` | the process exit code — always matches the text run |
|
|
119
|
+
|
|
120
|
+
## pre-commit
|
|
121
|
+
|
|
122
|
+
```yaml
|
|
123
|
+
# .pre-commit-config.yaml
|
|
124
|
+
repos:
|
|
125
|
+
- repo: https://github.com/R1ch1k/apidrift
|
|
126
|
+
rev: v0.0.1
|
|
127
|
+
hooks:
|
|
128
|
+
- id: apidrift
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
The hook uses `language: system` deliberately: apidrift must run in your project's
|
|
132
|
+
environment (where your dependencies live), so install it there — `pip install apidrift`.
|
|
133
|
+
|
|
134
|
+
## GitHub Action
|
|
135
|
+
|
|
136
|
+
Install your dependencies first, then run apidrift against the repo:
|
|
137
|
+
|
|
138
|
+
```yaml
|
|
139
|
+
# .github/workflows/apidrift.yml
|
|
140
|
+
name: apidrift
|
|
141
|
+
on: [push, pull_request]
|
|
142
|
+
jobs:
|
|
143
|
+
apidrift:
|
|
144
|
+
runs-on: ubuntu-latest
|
|
145
|
+
steps:
|
|
146
|
+
- uses: actions/checkout@v4
|
|
147
|
+
- uses: actions/setup-python@v5
|
|
148
|
+
with:
|
|
149
|
+
python-version: "3.x"
|
|
150
|
+
- run: pip install -e . # install YOUR deps so apidrift can introspect them
|
|
151
|
+
- uses: R1ch1k/apidrift@v0.0.1
|
|
152
|
+
with:
|
|
153
|
+
paths: "src tests"
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
The step fails the build on errors and passes on a clean (or notices-only) run.
|
|
157
|
+
|
|
158
|
+
## What apidrift deliberately does *not* flag
|
|
159
|
+
|
|
160
|
+
Precision is the whole adoption story, so silence is a feature — and a linter that
|
|
161
|
+
documents its silence is one you can trust to gate CI. apidrift stays quiet, by design,
|
|
162
|
+
on:
|
|
163
|
+
|
|
164
|
+
- **`openai.ChatCompletion.create(...)`** — openai 1.x keeps `ChatCompletion` as an
|
|
165
|
+
`APIRemovedInV1Proxy` whose `__getattr__` absorbs *any* attribute (so `.create`
|
|
166
|
+
"exists") and raises only when the call is *executed*. The symbol genuinely exists, so
|
|
167
|
+
an existence check correctly says nothing. That is a call-time/deprecation failure, a
|
|
168
|
+
different and harder problem than existence — not a missing name.
|
|
169
|
+
- **A non-deprecated subclass of a deprecated class** — the deprecation check reads the
|
|
170
|
+
symbol's *own* `__dict__`, never inherited attributes. A `__deprecated__` marker on a
|
|
171
|
+
base class does not make a live subclass that merely inherits from it look deprecated.
|
|
172
|
+
- **Anything unverifiable** — if a package fails to import, exposes a C-extension callable
|
|
173
|
+
with no introspectable signature, is a dynamic `__getattr__` module, or the called
|
|
174
|
+
function declares `**kwargs`, apidrift emits nothing. It never guesses.
|
|
175
|
+
|
|
176
|
+
apidrift would always rather miss a real drift than raise a false alarm on code that is
|
|
177
|
+
genuinely fine. A linter that cries wolf gets uninstalled.
|
|
178
|
+
|
|
179
|
+
## Scope (v0)
|
|
180
|
+
|
|
181
|
+
**In:** Python; calls whose receiver traces to an imported module or imported name
|
|
182
|
+
(`mod.func(...)`, `mod.sub.Class(...)`, `from mod import func; func(...)`); checked
|
|
183
|
+
against third-party packages installed in the current environment.
|
|
184
|
+
|
|
185
|
+
**Out (honest limitations, not bugs):**
|
|
186
|
+
|
|
187
|
+
- Method calls on inferred-type receivers (`df.merge(...)` where `df` is a local) — that
|
|
188
|
+
needs type inference, which is mypy's job.
|
|
189
|
+
- Cross-file flow analysis; autofix (suggestions only).
|
|
190
|
+
- C-extension callables with no introspectable signature → unverifiable, never flagged.
|
|
191
|
+
- stdlib (rarely LLM-hallucinated) and relative / first-party imports (no "installed
|
|
192
|
+
version" to check against).
|
|
193
|
+
|
|
194
|
+
## How it works
|
|
195
|
+
|
|
196
|
+
1. **Resolve** (`resolver.py`) — AST → import table → fully-qualified call targets rooted
|
|
197
|
+
at an installed third-party package. Sound-by-default: reassigned/ambiguous/shadowed
|
|
198
|
+
names are dropped, wildcard-origin bare names are refused, method-on-local receivers
|
|
199
|
+
are skipped.
|
|
200
|
+
2. **Introspect** (`introspect.py`) — import the package and walk the path into a
|
|
201
|
+
serializable record (existence / signature / `__deprecated__`). Every failure mode
|
|
202
|
+
degrades to "unverifiable".
|
|
203
|
+
3. **Check** (`checks.py`) — pure logic over the record: existence, keyword validity,
|
|
204
|
+
deprecation.
|
|
205
|
+
4. **Cache** (`cache.py`) — records are cached to disk keyed by `(package, version)`, so
|
|
206
|
+
repeat runs skip the import entirely. A version bump misses and re-introspects. Escape
|
|
207
|
+
hatches: `--no-cache`, `--clear-cache`.
|
|
208
|
+
|
|
209
|
+
## Design tenets
|
|
210
|
+
|
|
211
|
+
1. **Silence beats a false alarm.** If a call can't be resolved with confidence, apidrift
|
|
212
|
+
emits nothing. Sound-by-default — precision over recall.
|
|
213
|
+
2. **Deterministic, $0.** No model or network calls anywhere in detection. Pure AST +
|
|
214
|
+
introspection.
|
|
215
|
+
3. **DX is the moat.** Zero-config, CI-native, near-zero dependencies. "Did-you-mean"
|
|
216
|
+
suggestions are first-class.
|
|
217
|
+
|
|
218
|
+
## Validation
|
|
219
|
+
|
|
220
|
+
apidrift is tested with hand-written good/bad fixture pairs per seed library (openai,
|
|
221
|
+
pandas, requests, …) and a soundness-weighted suite that asserts it stays *silent* on the
|
|
222
|
+
ambiguous, `**kwargs`, C-extension, wildcard-import, dynamic-`__getattr__`, and
|
|
223
|
+
deprecation-proxy cases.
|
|
224
|
+
|
|
225
|
+
On top of that it is **validated against 30 real version-drift cases curated from the
|
|
226
|
+
pandas and scikit-learn release notes** (removed/renamed symbols and removed keyword
|
|
227
|
+
arguments), covering the same drift classes catalogued by benchmarks such as GitChameleon,
|
|
228
|
+
VersiCode, and CodeUpdateArena. Each case asserts *both* directions: the drifted call is
|
|
229
|
+
flagged, **and** its modern replacement stays silent. The set is pinned to the versions it
|
|
230
|
+
was verified against (`pandas==2.3.3`, `scikit-learn==1.8.0`); install them with
|
|
231
|
+
`pip install -e .[bench]` and run `pytest tests/test_benchmark.py` to reproduce the count.
|
|
232
|
+
Candidates that could not be flagged soundly (e.g. pydantic's `**kwargs`-accepting `Field`)
|
|
233
|
+
were dropped rather than counted — the number is the empirically passing total, not an
|
|
234
|
+
aspiration.
|
|
235
|
+
|
|
236
|
+
## License
|
|
237
|
+
|
|
238
|
+
MIT
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
name: "apidrift"
|
|
2
|
+
description: "Flag API calls that don't exist in the installed dependency version."
|
|
3
|
+
branding:
|
|
4
|
+
icon: "alert-triangle"
|
|
5
|
+
color: "purple"
|
|
6
|
+
|
|
7
|
+
inputs:
|
|
8
|
+
paths:
|
|
9
|
+
description: "Files, directories, or glob patterns to check."
|
|
10
|
+
required: false
|
|
11
|
+
default: "."
|
|
12
|
+
|
|
13
|
+
runs:
|
|
14
|
+
using: "composite"
|
|
15
|
+
steps:
|
|
16
|
+
# apidrift introspects the packages installed in *this* environment, so your own
|
|
17
|
+
# dependencies must already be installed (do that in an earlier workflow step).
|
|
18
|
+
- name: Install apidrift
|
|
19
|
+
shell: bash
|
|
20
|
+
run: pip install "${{ github.action_path }}"
|
|
21
|
+
- name: Run apidrift
|
|
22
|
+
shell: bash
|
|
23
|
+
run: apidrift ${{ inputs.paths }}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Realistic LLM-generated code carrying version-drift API mistakes.
|
|
2
|
+
|
|
3
|
+
apidrift's anchor fixture and README demo. Above the divider: genuine symbol-existence
|
|
4
|
+
mistakes Check A flags against the installed packages — three distinct mechanisms an
|
|
5
|
+
LLM trained on an older API would emit. Below the divider: cases apidrift must stay
|
|
6
|
+
SILENT on. Soundness is the product, so the fixture tests it like a feature.
|
|
7
|
+
|
|
8
|
+
NOTE: not runnable; it is parsed and introspected, not executed.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import openai
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import requests
|
|
14
|
+
from pandas import read_csv
|
|
15
|
+
|
|
16
|
+
# (1) Typo / hallucination — there is no `pandas.read_exel`.
|
|
17
|
+
frame = pd.read_exel("sheet.xlsx")
|
|
18
|
+
|
|
19
|
+
# (2) Cross-library confusion — `concatenate` is numpy; pandas spells it `concat`.
|
|
20
|
+
combined = pd.concatenate([frame, frame])
|
|
21
|
+
|
|
22
|
+
# (3) Version removal — `TimeGrouper` was removed in pandas 1.0 (use `Grouper`).
|
|
23
|
+
grouper = pd.TimeGrouper("M")
|
|
24
|
+
|
|
25
|
+
# ----------------------------------------------------------------------------- #
|
|
26
|
+
# Everything below must stay SILENT.
|
|
27
|
+
# ----------------------------------------------------------------------------- #
|
|
28
|
+
|
|
29
|
+
# Deprecation shim, NOT a missing symbol: openai 1.x keeps `ChatCompletion` as a
|
|
30
|
+
# proxy that exists on attribute access and only raises when *called*. Existence
|
|
31
|
+
# introspection sees it as present, so Check A correctly says nothing — that drift
|
|
32
|
+
# is a call-time/deprecation concern, a different (harder) check than existence.
|
|
33
|
+
response = openai.ChatCompletion.create(model="gpt-4", messages=[])
|
|
34
|
+
|
|
35
|
+
# `read_csv` exists; the removed `mangle_dupe_cols` keyword is a Check B (kwargs)
|
|
36
|
+
# problem, not an existence one — Check A stays silent here by design.
|
|
37
|
+
table = pd.read_csv("data.csv", mangle_dupe_cols=True)
|
|
38
|
+
|
|
39
|
+
# Valid calls that genuinely exist in the installed versions.
|
|
40
|
+
clean = read_csv("data.csv")
|
|
41
|
+
client = openai.OpenAI()
|
|
42
|
+
|
|
43
|
+
# Method call on an inferred-type receiver -> needs type inference, out of scope.
|
|
44
|
+
merged = clean.merge(frame, on="id")
|
|
45
|
+
|
|
46
|
+
# Var-keyword target (`requests.get(url, **kwargs)`) -> kwargs unverifiable, silent.
|
|
47
|
+
requests.get("https://example.com", timeout=5, verify=False)
|