code-analyser 1.2.0__tar.gz → 1.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {code_analyser-1.2.0 → code_analyser-1.4.0}/PKG-INFO +3 -1
- code_analyser-1.4.0/examples/basic_usage.md +43 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/pyproject.toml +5 -1
- code_analyser-1.4.0/src/code_analyser/__init__.py +21 -0
- code_analyser-1.4.0/src/code_analyser/embedding.py +25 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/models.py +3 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/pipeline.py +5 -0
- code_analyser-1.4.0/tests/test_embedding.py +25 -0
- code_analyser-1.4.0/tests/test_public_api.py +29 -0
- code_analyser-1.4.0/uv.lock +2424 -0
- code_analyser-1.2.0/src/code_analyser/__init__.py +0 -10
- code_analyser-1.2.0/uv.lock +0 -827
- {code_analyser-1.2.0 → code_analyser-1.4.0}/.dockerignore +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/.env.example +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/.gitignore +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/LICENSE +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/README.md +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/docs/superpowers/plans/2026-05-06-code-analyser-rewrite.md +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/docs/superpowers/specs/2026-05-05-code-analyser-design.md +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/api.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/cli.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/core/__init__.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/core/css_.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/core/html_.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/core/javascript_.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/core/notebook_.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/core/python_.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/core/sql_.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/core/typescript_.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/detect.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/llm.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/manifest.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/settings.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/__init__.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/api/__init__.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/api/test_api.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/cli/__init__.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/cli/test_cli.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/conftest.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/integration/__init__.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/integration/test_full_pipeline.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/integration/test_pipeline.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/test_invariants.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/unit/__init__.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/unit/test_css_.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/unit/test_detect.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/unit/test_html_.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/unit/test_javascript_.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/unit/test_llm.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/unit/test_models.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/unit/test_notebook_.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/unit/test_python_.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/unit/test_scaffold.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/unit/test_sql_.py +0 -0
- {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/unit/test_typescript_.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: code-analyser
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.4.0
|
|
4
4
|
Summary: Source code analyser — part of the analyser family
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -22,6 +22,8 @@ Requires-Dist: httpx>=0.27.0; extra == 'dev'
|
|
|
22
22
|
Requires-Dist: pytest-cov>=4.1.0; extra == 'dev'
|
|
23
23
|
Requires-Dist: pytest>=7.4.0; extra == 'dev'
|
|
24
24
|
Requires-Dist: ruff>=0.4.0; extra == 'dev'
|
|
25
|
+
Provides-Extra: embeddings
|
|
26
|
+
Requires-Dist: lens-embed[text]>=0.1.1; extra == 'embeddings'
|
|
25
27
|
Provides-Extra: llm
|
|
26
28
|
Requires-Dist: anthropic>=0.7.0; extra == 'llm'
|
|
27
29
|
Description-Content-Type: text/markdown
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# code-analyser — basic usage
|
|
2
|
+
|
|
3
|
+
Analyse a source file (`.py`, `.js`, `.ts`, `.html`, `.css`, `.sql`, `.ipynb`, or a `.zip` of them) for structural code signals.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install code-analyser
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## CLI
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
# Human-readable table
|
|
15
|
+
code-analyser app.py
|
|
16
|
+
|
|
17
|
+
# JSON output
|
|
18
|
+
code-analyser app.py --json
|
|
19
|
+
|
|
20
|
+
# Include LLM quality signals (requires the [llm] extra)
|
|
21
|
+
code-analyser app.py --llm
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Python
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
from code_analyser import CodeAnalyser
|
|
28
|
+
|
|
29
|
+
result = CodeAnalyser().analyse("app.py")
|
|
30
|
+
print(result.languages_detected, result.file_count)
|
|
31
|
+
for f in result.files:
|
|
32
|
+
print(f.filename, f.language, f.metrics)
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## HTTP
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
# Start the server (default port 8004)
|
|
39
|
+
code-analyser serve
|
|
40
|
+
|
|
41
|
+
# Analyse a source file via multipart upload
|
|
42
|
+
curl -F file=@app.py http://localhost:8004/analyse
|
|
43
|
+
```
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "code-analyser"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.4.0"
|
|
8
8
|
description = "Source code analyser — part of the analyser family"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -26,6 +26,9 @@ dependencies = [
|
|
|
26
26
|
|
|
27
27
|
[project.optional-dependencies]
|
|
28
28
|
llm = ["anthropic>=0.7.0"]
|
|
29
|
+
embeddings = [
|
|
30
|
+
"lens-embed[text]>=0.1.1",
|
|
31
|
+
]
|
|
29
32
|
dev = [
|
|
30
33
|
"pytest>=7.4.0",
|
|
31
34
|
"pytest-cov>=4.1.0",
|
|
@@ -37,6 +40,7 @@ dev = [
|
|
|
37
40
|
# the published wheel, which keeps the plain `lens-contract>=0.2.0` PyPI pin.
|
|
38
41
|
[tool.uv.sources]
|
|
39
42
|
lens-contract = { path = "../lens-contract", editable = true }
|
|
43
|
+
lens-embed = { path = "../lens-embed", editable = true }
|
|
40
44
|
|
|
41
45
|
[project.scripts]
|
|
42
46
|
code-analyser = "code_analyser.cli:main"
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from importlib.metadata import version as _v
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from .manifest import MANIFEST
|
|
5
|
+
from .models import CodeAnalysis
|
|
6
|
+
from .pipeline import CodeAnalyser
|
|
7
|
+
|
|
8
|
+
__version__ = _v("code-analyser")
|
|
9
|
+
del _v
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def analyse(path: str | Path, *, llm: bool = False) -> CodeAnalysis:
|
|
13
|
+
"""Analyse ``path`` and return a :class:`CodeAnalysis`.
|
|
14
|
+
|
|
15
|
+
Module-level convenience for the family's canonical call shape — equivalent
|
|
16
|
+
to ``CodeAnalyser().analyse(path)``.
|
|
17
|
+
"""
|
|
18
|
+
return CodeAnalyser().analyse(Path(path), llm=llm)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
__all__ = ["CodeAnalyser", "CodeAnalysis", "analyse", "MANIFEST", "__version__"]
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Source-code embedding via the family's shared helper (lens-embed).
|
|
2
|
+
|
|
3
|
+
A single pinned model across the family means this vector is comparable to
|
|
4
|
+
other members' vectors — the basis for cross-artefact and cohort-distinctiveness
|
|
5
|
+
signals computed downstream. Opt-in and degradable: install the [embeddings]
|
|
6
|
+
extra to populate it; without it (or on any failure) this returns None.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def embed_document(text: str) -> list[float] | None:
|
|
13
|
+
"""Pooled, L2-normalised vector, or None if embeddings are off."""
|
|
14
|
+
if not text or not text.strip():
|
|
15
|
+
return None
|
|
16
|
+
try:
|
|
17
|
+
from lens_embed import backend_available, embed_long_text
|
|
18
|
+
except ImportError:
|
|
19
|
+
return None
|
|
20
|
+
if not backend_available("text"):
|
|
21
|
+
return None
|
|
22
|
+
try:
|
|
23
|
+
return embed_long_text(text)
|
|
24
|
+
except Exception:
|
|
25
|
+
return None
|
|
@@ -191,3 +191,6 @@ class CodeAnalysis(BaseModel):
|
|
|
191
191
|
files: list[FileAnalysis]
|
|
192
192
|
cross_file: CrossFileSignals
|
|
193
193
|
llm_signals: TopLevelLLMSignals | None = None
|
|
194
|
+
# Pooled, L2-normalised source vector from lens-embed (pinned all-MiniLM-L6-v2).
|
|
195
|
+
# Comparable across members; None unless [embeddings] installed.
|
|
196
|
+
embedding: list[float] | None = None
|
|
@@ -3,6 +3,7 @@ import zipfile
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
|
|
5
5
|
from .detect import detect_language
|
|
6
|
+
from .embedding import embed_document
|
|
6
7
|
from .models import (
|
|
7
8
|
CodeAnalysis, CrossFileSignals, FileAnalysis,
|
|
8
9
|
FileLLMSignals, TopLevelLLMSignals,
|
|
@@ -79,6 +80,9 @@ class CodeAnalyser:
|
|
|
79
80
|
frameworks_detected=sorted(all_frameworks),
|
|
80
81
|
)
|
|
81
82
|
|
|
83
|
+
combined_source = "\n\n".join(_decode(content) for _, content in pairs)
|
|
84
|
+
embedding = embed_document(combined_source)
|
|
85
|
+
|
|
82
86
|
return CodeAnalysis(
|
|
83
87
|
input=path.name,
|
|
84
88
|
file_count=len(files),
|
|
@@ -86,6 +90,7 @@ class CodeAnalyser:
|
|
|
86
90
|
files=files,
|
|
87
91
|
cross_file=cross,
|
|
88
92
|
llm_signals=llm_top,
|
|
93
|
+
embedding=embedding,
|
|
89
94
|
)
|
|
90
95
|
|
|
91
96
|
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Code embedding wiring — field presence + graceful degradation."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
import importlib.util
|
|
4
|
+
import pytest
|
|
5
|
+
from code_analyser.embedding import embed_document
|
|
6
|
+
from code_analyser.models import CodeAnalysis
|
|
7
|
+
|
|
8
|
+
_TEXT = importlib.util.find_spec("lens_embed") is not None and importlib.util.find_spec("sentence_transformers") is not None
|
|
9
|
+
|
|
10
|
+
def test_field_default_none():
|
|
11
|
+
assert "embedding" in CodeAnalysis.model_fields
|
|
12
|
+
assert CodeAnalysis.model_fields["embedding"].default is None
|
|
13
|
+
|
|
14
|
+
def test_empty_is_none():
|
|
15
|
+
assert embed_document("") is None
|
|
16
|
+
assert embed_document(" \n ") is None
|
|
17
|
+
|
|
18
|
+
@pytest.mark.skipif(_TEXT, reason="embeddings extra installed")
|
|
19
|
+
def test_none_without_backend():
|
|
20
|
+
assert embed_document("def add(a, b):\n return a + b\n") is None
|
|
21
|
+
|
|
22
|
+
@pytest.mark.skipif(not _TEXT, reason="needs [embeddings]")
|
|
23
|
+
def test_vector_with_backend():
|
|
24
|
+
v = embed_document("def add(a, b):\n return a + b\n\n" * 5)
|
|
25
|
+
assert isinstance(v, list) and len(v) == 384
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""The canonical public surface every family analyser exposes.
|
|
2
|
+
|
|
3
|
+
See lens-analysers/CONVENTIONS.md: each `-analyser` engine exports its
|
|
4
|
+
`<Name>Analyser` class, the `<Name>Analysis` result model, a module-level
|
|
5
|
+
`analyse()` convenience function, `MANIFEST`, and `__version__`.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import code_analyser
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_canonical_surface_importable():
|
|
14
|
+
from code_analyser import ( # noqa: F401
|
|
15
|
+
MANIFEST,
|
|
16
|
+
CodeAnalyser,
|
|
17
|
+
CodeAnalysis,
|
|
18
|
+
analyse,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
assert callable(analyse)
|
|
22
|
+
assert callable(CodeAnalyser)
|
|
23
|
+
assert MANIFEST["name"] == "code-analyser"
|
|
24
|
+
assert isinstance(code_analyser.__version__, str)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_surface_in_dunder_all():
|
|
28
|
+
for name in ("CodeAnalyser", "CodeAnalysis", "analyse", "MANIFEST", "__version__"):
|
|
29
|
+
assert name in code_analyser.__all__
|