code-analyser 1.2.0__tar.gz → 1.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {code_analyser-1.2.0 → code_analyser-1.4.0}/PKG-INFO +3 -1
  2. code_analyser-1.4.0/examples/basic_usage.md +43 -0
  3. {code_analyser-1.2.0 → code_analyser-1.4.0}/pyproject.toml +5 -1
  4. code_analyser-1.4.0/src/code_analyser/__init__.py +21 -0
  5. code_analyser-1.4.0/src/code_analyser/embedding.py +25 -0
  6. {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/models.py +3 -0
  7. {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/pipeline.py +5 -0
  8. code_analyser-1.4.0/tests/test_embedding.py +25 -0
  9. code_analyser-1.4.0/tests/test_public_api.py +29 -0
  10. code_analyser-1.4.0/uv.lock +2424 -0
  11. code_analyser-1.2.0/src/code_analyser/__init__.py +0 -10
  12. code_analyser-1.2.0/uv.lock +0 -827
  13. {code_analyser-1.2.0 → code_analyser-1.4.0}/.dockerignore +0 -0
  14. {code_analyser-1.2.0 → code_analyser-1.4.0}/.env.example +0 -0
  15. {code_analyser-1.2.0 → code_analyser-1.4.0}/.gitignore +0 -0
  16. {code_analyser-1.2.0 → code_analyser-1.4.0}/LICENSE +0 -0
  17. {code_analyser-1.2.0 → code_analyser-1.4.0}/README.md +0 -0
  18. {code_analyser-1.2.0 → code_analyser-1.4.0}/docs/superpowers/plans/2026-05-06-code-analyser-rewrite.md +0 -0
  19. {code_analyser-1.2.0 → code_analyser-1.4.0}/docs/superpowers/specs/2026-05-05-code-analyser-design.md +0 -0
  20. {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/api.py +0 -0
  21. {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/cli.py +0 -0
  22. {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/core/__init__.py +0 -0
  23. {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/core/css_.py +0 -0
  24. {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/core/html_.py +0 -0
  25. {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/core/javascript_.py +0 -0
  26. {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/core/notebook_.py +0 -0
  27. {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/core/python_.py +0 -0
  28. {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/core/sql_.py +0 -0
  29. {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/core/typescript_.py +0 -0
  30. {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/detect.py +0 -0
  31. {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/llm.py +0 -0
  32. {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/manifest.py +0 -0
  33. {code_analyser-1.2.0 → code_analyser-1.4.0}/src/code_analyser/settings.py +0 -0
  34. {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/__init__.py +0 -0
  35. {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/api/__init__.py +0 -0
  36. {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/api/test_api.py +0 -0
  37. {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/cli/__init__.py +0 -0
  38. {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/cli/test_cli.py +0 -0
  39. {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/conftest.py +0 -0
  40. {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/integration/__init__.py +0 -0
  41. {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/integration/test_full_pipeline.py +0 -0
  42. {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/integration/test_pipeline.py +0 -0
  43. {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/test_invariants.py +0 -0
  44. {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/unit/__init__.py +0 -0
  45. {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/unit/test_css_.py +0 -0
  46. {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/unit/test_detect.py +0 -0
  47. {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/unit/test_html_.py +0 -0
  48. {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/unit/test_javascript_.py +0 -0
  49. {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/unit/test_llm.py +0 -0
  50. {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/unit/test_models.py +0 -0
  51. {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/unit/test_notebook_.py +0 -0
  52. {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/unit/test_python_.py +0 -0
  53. {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/unit/test_scaffold.py +0 -0
  54. {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/unit/test_sql_.py +0 -0
  55. {code_analyser-1.2.0 → code_analyser-1.4.0}/tests/unit/test_typescript_.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: code-analyser
3
- Version: 1.2.0
3
+ Version: 1.4.0
4
4
  Summary: Source code analyser — part of the analyser family
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.10
@@ -22,6 +22,8 @@ Requires-Dist: httpx>=0.27.0; extra == 'dev'
22
22
  Requires-Dist: pytest-cov>=4.1.0; extra == 'dev'
23
23
  Requires-Dist: pytest>=7.4.0; extra == 'dev'
24
24
  Requires-Dist: ruff>=0.4.0; extra == 'dev'
25
+ Provides-Extra: embeddings
26
+ Requires-Dist: lens-embed[text]>=0.1.1; extra == 'embeddings'
25
27
  Provides-Extra: llm
26
28
  Requires-Dist: anthropic>=0.7.0; extra == 'llm'
27
29
  Description-Content-Type: text/markdown
@@ -0,0 +1,43 @@
1
+ # code-analyser — basic usage
2
+
3
+ Analyse a source file (`.py`, `.js`, `.ts`, `.html`, `.css`, `.sql`, `.ipynb`, or a `.zip` of them) for structural code signals.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install code-analyser
9
+ ```
10
+
11
+ ## CLI
12
+
13
+ ```bash
14
+ # Human-readable table
15
+ code-analyser app.py
16
+
17
+ # JSON output
18
+ code-analyser app.py --json
19
+
20
+ # Include LLM quality signals (requires the [llm] extra)
21
+ code-analyser app.py --llm
22
+ ```
23
+
24
+ ## Python
25
+
26
+ ```python
27
+ from code_analyser import CodeAnalyser
28
+
29
+ result = CodeAnalyser().analyse("app.py")
30
+ print(result.languages_detected, result.file_count)
31
+ for f in result.files:
32
+ print(f.filename, f.language, f.metrics)
33
+ ```
34
+
35
+ ## HTTP
36
+
37
+ ```bash
38
+ # Start the server (default port 8004)
39
+ code-analyser serve
40
+
41
+ # Analyse a source file via multipart upload
42
+ curl -F file=@app.py http://localhost:8004/analyse
43
+ ```
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "code-analyser"
7
- version = "1.2.0"
7
+ version = "1.4.0"
8
8
  description = "Source code analyser — part of the analyser family"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -26,6 +26,9 @@ dependencies = [
26
26
 
27
27
  [project.optional-dependencies]
28
28
  llm = ["anthropic>=0.7.0"]
29
+ embeddings = [
30
+ "lens-embed[text]>=0.1.1",
31
+ ]
29
32
  dev = [
30
33
  "pytest>=7.4.0",
31
34
  "pytest-cov>=4.1.0",
@@ -37,6 +40,7 @@ dev = [
37
40
  # the published wheel, which keeps the plain `lens-contract>=0.2.0` PyPI pin.
38
41
  [tool.uv.sources]
39
42
  lens-contract = { path = "../lens-contract", editable = true }
43
+ lens-embed = { path = "../lens-embed", editable = true }
40
44
 
41
45
  [project.scripts]
42
46
  code-analyser = "code_analyser.cli:main"
@@ -0,0 +1,21 @@
1
+ from importlib.metadata import version as _v
2
+ from pathlib import Path
3
+
4
+ from .manifest import MANIFEST
5
+ from .models import CodeAnalysis
6
+ from .pipeline import CodeAnalyser
7
+
8
+ __version__ = _v("code-analyser")
9
+ del _v
10
+
11
+
12
+ def analyse(path: str | Path, *, llm: bool = False) -> CodeAnalysis:
13
+ """Analyse ``path`` and return a :class:`CodeAnalysis`.
14
+
15
+ Module-level convenience for the family's canonical call shape — equivalent
16
+ to ``CodeAnalyser().analyse(path)``.
17
+ """
18
+ return CodeAnalyser().analyse(Path(path), llm=llm)
19
+
20
+
21
+ __all__ = ["CodeAnalyser", "CodeAnalysis", "analyse", "MANIFEST", "__version__"]
@@ -0,0 +1,25 @@
1
+ """Source-code embedding via the family's shared helper (lens-embed).
2
+
3
+ A single pinned model across the family means this vector is comparable to
4
+ other members' vectors — the basis for cross-artefact and cohort-distinctiveness
5
+ signals computed downstream. Opt-in and degradable: install the [embeddings]
6
+ extra to populate it; without it (or on any failure) this returns None.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+
12
+ def embed_document(text: str) -> list[float] | None:
13
+ """Pooled, L2-normalised vector, or None if embeddings are off."""
14
+ if not text or not text.strip():
15
+ return None
16
+ try:
17
+ from lens_embed import backend_available, embed_long_text
18
+ except ImportError:
19
+ return None
20
+ if not backend_available("text"):
21
+ return None
22
+ try:
23
+ return embed_long_text(text)
24
+ except Exception:
25
+ return None
@@ -191,3 +191,6 @@ class CodeAnalysis(BaseModel):
191
191
  files: list[FileAnalysis]
192
192
  cross_file: CrossFileSignals
193
193
  llm_signals: TopLevelLLMSignals | None = None
194
+ # Pooled, L2-normalised source vector from lens-embed (pinned all-MiniLM-L6-v2).
195
+ # Comparable across members; None unless [embeddings] installed.
196
+ embedding: list[float] | None = None
@@ -3,6 +3,7 @@ import zipfile
3
3
  from pathlib import Path
4
4
 
5
5
  from .detect import detect_language
6
+ from .embedding import embed_document
6
7
  from .models import (
7
8
  CodeAnalysis, CrossFileSignals, FileAnalysis,
8
9
  FileLLMSignals, TopLevelLLMSignals,
@@ -79,6 +80,9 @@ class CodeAnalyser:
79
80
  frameworks_detected=sorted(all_frameworks),
80
81
  )
81
82
 
83
+ combined_source = "\n\n".join(_decode(content) for _, content in pairs)
84
+ embedding = embed_document(combined_source)
85
+
82
86
  return CodeAnalysis(
83
87
  input=path.name,
84
88
  file_count=len(files),
@@ -86,6 +90,7 @@ class CodeAnalyser:
86
90
  files=files,
87
91
  cross_file=cross,
88
92
  llm_signals=llm_top,
93
+ embedding=embedding,
89
94
  )
90
95
 
91
96
 
@@ -0,0 +1,25 @@
1
+ """Code embedding wiring — field presence + graceful degradation."""
2
+ from __future__ import annotations
3
+ import importlib.util
4
+ import pytest
5
+ from code_analyser.embedding import embed_document
6
+ from code_analyser.models import CodeAnalysis
7
+
8
+ _TEXT = importlib.util.find_spec("lens_embed") is not None and importlib.util.find_spec("sentence_transformers") is not None
9
+
10
+ def test_field_default_none():
11
+ assert "embedding" in CodeAnalysis.model_fields
12
+ assert CodeAnalysis.model_fields["embedding"].default is None
13
+
14
+ def test_empty_is_none():
15
+ assert embed_document("") is None
16
+ assert embed_document(" \n ") is None
17
+
18
+ @pytest.mark.skipif(_TEXT, reason="embeddings extra installed")
19
+ def test_none_without_backend():
20
+ assert embed_document("def add(a, b):\n return a + b\n") is None
21
+
22
+ @pytest.mark.skipif(not _TEXT, reason="needs [embeddings]")
23
+ def test_vector_with_backend():
24
+ v = embed_document("def add(a, b):\n return a + b\n\n" * 5)
25
+ assert isinstance(v, list) and len(v) == 384
@@ -0,0 +1,29 @@
1
+ """The canonical public surface every family analyser exposes.
2
+
3
+ See lens-analysers/CONVENTIONS.md: each `-analyser` engine exports its
4
+ `<Name>Analyser` class, the `<Name>Analysis` result model, a module-level
5
+ `analyse()` convenience function, `MANIFEST`, and `__version__`.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import code_analyser
11
+
12
+
13
+ def test_canonical_surface_importable():
14
+ from code_analyser import ( # noqa: F401
15
+ MANIFEST,
16
+ CodeAnalyser,
17
+ CodeAnalysis,
18
+ analyse,
19
+ )
20
+
21
+ assert callable(analyse)
22
+ assert callable(CodeAnalyser)
23
+ assert MANIFEST["name"] == "code-analyser"
24
+ assert isinstance(code_analyser.__version__, str)
25
+
26
+
27
+ def test_surface_in_dunder_all():
28
+ for name in ("CodeAnalyser", "CodeAnalysis", "analyse", "MANIFEST", "__version__"):
29
+ assert name in code_analyser.__all__