coderay 1.1.0__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {coderay-1.1.0/src/coderay.egg-info → coderay-1.2.0}/PKG-INFO +46 -39
- {coderay-1.1.0 → coderay-1.2.0}/README.md +44 -38
- {coderay-1.1.0 → coderay-1.2.0}/pyproject.toml +2 -1
- coderay-1.2.0/src/coderay/__init__.py +1 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/cli/commands.py +47 -4
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/core/timing.py +7 -2
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/embedding/base.py +2 -2
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/embedding/local.py +21 -9
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/embedding/mlx_backend.py +20 -19
- coderay-1.2.0/src/coderay/graph/README.md +45 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/graph/__init__.py +3 -2
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/graph/builder.py +6 -31
- coderay-1.2.0/src/coderay/graph/code_graph.py +276 -0
- coderay-1.2.0/src/coderay/graph/extractors/__init__.py +5 -0
- coderay-1.2.0/src/coderay/graph/extractors/base.py +248 -0
- coderay-1.2.0/src/coderay/graph/extractors/js_ts/__init__.py +7 -0
- coderay-1.2.0/src/coderay/graph/extractors/js_ts/extractor.py +53 -0
- coderay-1.2.0/src/coderay/graph/extractors/python/__init__.py +7 -0
- coderay-1.2.0/src/coderay/graph/extractors/python/extractor.py +58 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/graph/facts.py +10 -0
- coderay-1.2.0/src/coderay/graph/graph_builder.py +92 -0
- coderay-1.2.0/src/coderay/graph/handlers/__init__.py +1 -0
- coderay-1.2.0/src/coderay/graph/handlers/assignment_binder.py +140 -0
- coderay-1.2.0/src/coderay/graph/handlers/call_emitter.py +47 -0
- coderay-1.2.0/src/coderay/graph/handlers/decorator_emitter.py +74 -0
- coderay-1.2.0/src/coderay/graph/handlers/definition_binder.py +41 -0
- coderay-1.2.0/src/coderay/graph/handlers/definition_emitter.py +68 -0
- coderay-1.2.0/src/coderay/graph/handlers/helpers.py +20 -0
- coderay-1.2.0/src/coderay/graph/handlers/js_ts/__init__.py +1 -0
- coderay-1.2.0/src/coderay/graph/handlers/js_ts/import_binder.py +111 -0
- coderay-1.2.0/src/coderay/graph/handlers/js_ts/import_emitter.py +41 -0
- coderay-1.2.0/src/coderay/graph/handlers/python/__init__.py +1 -0
- coderay-1.2.0/src/coderay/graph/handlers/python/assignment_binder.py +129 -0
- coderay-1.2.0/src/coderay/graph/handlers/python/function_binder.py +55 -0
- coderay-1.2.0/src/coderay/graph/handlers/python/import_binder.py +133 -0
- coderay-1.2.0/src/coderay/graph/handlers/python/import_emitter.py +58 -0
- coderay-1.2.0/src/coderay/graph/handlers/python/with_binder.py +59 -0
- coderay-1.2.0/src/coderay/graph/handlers/typed_annotations.py +77 -0
- coderay-1.2.0/src/coderay/graph/handlers/typed_params.py +139 -0
- coderay-1.2.0/src/coderay/graph/impact.py +249 -0
- coderay-1.2.0/src/coderay/graph/language_plugin.py +63 -0
- coderay-1.2.0/src/coderay/graph/lowering/__init__.py +3 -0
- coderay-1.2.0/src/coderay/graph/lowering/callee_resolver.py +141 -0
- coderay-1.2.0/src/coderay/graph/lowering/callee_strategy.py +25 -0
- coderay-1.2.0/src/coderay/graph/lowering/cst_helpers.py +64 -0
- coderay-1.2.0/src/coderay/graph/lowering/name_bindings.py +229 -0
- coderay-1.1.0/src/coderay/graph/emit.py → coderay-1.2.0/src/coderay/graph/materialise.py +8 -10
- coderay-1.2.0/src/coderay/graph/passes/resolve_bare_phantoms.py +49 -0
- coderay-1.2.0/src/coderay/graph/pipeline.py +35 -0
- coderay-1.2.0/src/coderay/graph/project_index.py +43 -0
- coderay-1.2.0/src/coderay/graph/refs.py +59 -0
- coderay-1.1.0/src/coderay/graph/identifiers.py → coderay-1.2.0/src/coderay/graph/utils.py +6 -3
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/mcp_server/server.py +53 -9
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/parsing/README.md +2 -2
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/parsing/base.py +4 -1
- coderay-1.1.0/src/coderay/graph/_utils.py → coderay-1.2.0/src/coderay/parsing/conventions.py +16 -19
- coderay-1.2.0/src/coderay/parsing/cst_traversal.py +118 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/parsing/languages.py +39 -20
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/pipeline/indexer.py +48 -40
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/skeleton/README.md +3 -2
- coderay-1.2.0/src/coderay/skeleton/extractor.py +402 -0
- coderay-1.2.0/src/coderay/skeleton/path_range.py +39 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/vcs/git.py +0 -4
- {coderay-1.1.0 → coderay-1.2.0/src/coderay.egg-info}/PKG-INFO +46 -39
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay.egg-info/SOURCES.txt +40 -25
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay.egg-info/requires.txt +1 -0
- coderay-1.1.0/src/coderay/__init__.py +0 -1
- coderay-1.1.0/src/coderay/graph/README.md +0 -58
- coderay-1.1.0/src/coderay/graph/code_graph.py +0 -500
- coderay-1.1.0/src/coderay/graph/extractor.py +0 -67
- coderay-1.1.0/src/coderay/graph/file_context.py +0 -140
- coderay-1.1.0/src/coderay/graph/passes/global_passes.py +0 -10
- coderay-1.1.0/src/coderay/graph/pipeline.py +0 -17
- coderay-1.1.0/src/coderay/graph/plugin_protocol.py +0 -62
- coderay-1.1.0/src/coderay/graph/plugins/__init__.py +0 -1
- coderay-1.1.0/src/coderay/graph/plugins/base/__init__.py +0 -10
- coderay-1.1.0/src/coderay/graph/plugins/base/extractor.py +0 -142
- coderay-1.1.0/src/coderay/graph/plugins/base/handlers/__init__.py +0 -13
- coderay-1.1.0/src/coderay/graph/plugins/base/handlers/assignments.py +0 -137
- coderay-1.1.0/src/coderay/graph/plugins/base/handlers/calls.py +0 -304
- coderay-1.1.0/src/coderay/graph/plugins/base/handlers/definitions.py +0 -143
- coderay-1.1.0/src/coderay/graph/plugins/base/handlers/type_resolution.py +0 -202
- coderay-1.1.0/src/coderay/graph/plugins/base/plugin.py +0 -43
- coderay-1.1.0/src/coderay/graph/plugins/js_ts/__init__.py +0 -8
- coderay-1.1.0/src/coderay/graph/plugins/js_ts/extractor.py +0 -65
- coderay-1.1.0/src/coderay/graph/plugins/js_ts/import_handler.py +0 -106
- coderay-1.1.0/src/coderay/graph/plugins/python/__init__.py +0 -7
- coderay-1.1.0/src/coderay/graph/plugins/python/extractor.py +0 -280
- coderay-1.1.0/src/coderay/graph/plugins/python/import_handler.py +0 -139
- coderay-1.1.0/src/coderay/graph/registry.py +0 -23
- coderay-1.1.0/src/coderay/graph/resolution.py +0 -16
- coderay-1.1.0/src/coderay/parsing/conventions.py +0 -13
- coderay-1.1.0/src/coderay/skeleton/extractor.py +0 -281
- {coderay-1.1.0 → coderay-1.2.0}/LICENSE +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/MANIFEST.in +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/NOTICE +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/setup.cfg +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/README.md +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/chunking/README.md +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/chunking/__init__.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/chunking/chunker.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/cli/README.md +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/cli/__init__.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/cli/search_input.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/core/README.md +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/core/__init__.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/core/config.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/core/defaults/__init__.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/core/defaults/default.coderay.toml +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/core/errors.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/core/index_workspace.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/core/lock.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/core/models.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/core/utils.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/embedding/README.md +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/embedding/__init__.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/embedding/backend_resolve.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/embedding/format.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/embedding/prefixes.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/graph/passes/__init__.py +0 -0
- /coderay-1.1.0/src/coderay/graph/plugins/python/passes.py → /coderay-1.2.0/src/coderay/graph/passes/python.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/mcp_server/README.md +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/mcp_server/__init__.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/mcp_server/errors.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/parsing/cst_kind.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/pipeline/README.md +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/pipeline/__init__.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/pipeline/watcher.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/retrieval/README.md +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/retrieval/__init__.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/retrieval/boosting.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/retrieval/models.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/retrieval/search.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/skeleton/__init__.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/state/README.md +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/state/__init__.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/state/machine.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/state/version.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/storage/README.md +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/storage/__init__.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/storage/lancedb.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/vcs/README.md +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay/vcs/__init__.py +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay.egg-info/dependency_links.txt +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay.egg-info/entry_points.txt +0 -0
- {coderay-1.1.0 → coderay-1.2.0}/src/coderay.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: coderay
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: X-ray your codebase — semantic search, code graphs, file skeletons, and MCP server
|
|
5
5
|
Author-email: Bogdan Copocean <bogdancopocean@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -41,6 +41,7 @@ Requires-Dist: pytest>=7.0; extra == "dev"
|
|
|
41
41
|
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
42
42
|
Requires-Dist: ruff>=0.8.0; extra == "dev"
|
|
43
43
|
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
44
|
+
Requires-Dist: tiktoken>=0.5.0; extra == "dev"
|
|
44
45
|
Provides-Extra: maintain
|
|
45
46
|
Requires-Dist: pylance>=0.15.0; extra == "maintain"
|
|
46
47
|
Provides-Extra: mlx
|
|
@@ -56,38 +57,71 @@ Dynamic: license-file
|
|
|
56
57
|
[](LICENSE)
|
|
57
58
|
[](https://github.com/bogdan-copocean/coderay/actions/workflows/ci.yml)
|
|
58
59
|
|
|
59
|
-
**CodeRay**
|
|
60
|
+
**CodeRay** builds a **local code index** that gives AI agents a smarter way to explore a codebase — reading only what they need, not whole files.
|
|
60
61
|
|
|
61
|
-
**No LLM
|
|
62
|
+
**Runs locally. No LLM. No network. No API key.**
|
|
62
63
|
|
|
64
|
+
## The problem
|
|
63
65
|
|
|
64
|
-
|
|
66
|
+
AI agents exploring a codebase default to reading whole files – even when one function is all that's needed. Every unnecessary line **burns tokens** and **floods the context window**: driving up API costs and noise with every read.
|
|
67
|
+
|
|
68
|
+
The root cause is simple: agents know the **file paths** but no finer location. Without knowing *where* in a file something lives, they have no choice but to read everything.
|
|
69
|
+
|
|
70
|
+
**CodeRay fixes this.** Every tool returns **file paths with exact line ranges** — so agents locate first, then read only the lines that matter.
|
|
71
|
+
|
|
72
|
+
## How it works
|
|
73
|
+
|
|
74
|
+
CodeRay exposes three primitives, each returning **paths + line ranges**:
|
|
75
|
+
|
|
76
|
+
| Tool | Question it answers | What agents get |
|
|
77
|
+
|------|---------------------|-----------------|
|
|
78
|
+
| **search** | *Where is the code that does X?* | Relevant chunks with file paths and line ranges |
|
|
79
|
+
| **skeleton** | *What's the shape of this file?* | Signatures + docstrings only, each tagged with its line range |
|
|
80
|
+
| **impact** | *What breaks if I change this?* | Callers, imports, and inheritors — located by line range |
|
|
81
|
+
|
|
82
|
+
### The two-phase flow
|
|
65
83
|
|
|
66
|
-
**
|
|
84
|
+
1. **Locate** — run `search`, `skeleton`, or `impact` to find what's needed. Every result includes a file path and a symbol-level line range.
|
|
85
|
+
2. **Read precisely** — use those line ranges to load only the relevant snippet. Skip the rest.
|
|
67
86
|
|
|
68
|
-
|
|
87
|
+
This keeps context windows lean and agent reasoning focused. CodeRay is not a replacement for `grep` — it fills the gap when exact names are unknown or a map is needed before reading.
|
|
88
|
+
|
|
89
|
+
### Token savings (tiktoken, `cl100k_base`)
|
|
90
|
+
|
|
91
|
+
| File | Lines | Full read | Skeleton | Savings | % reduction |
|
|
92
|
+
|------|-------|-----------|----------|---------|-------------|
|
|
93
|
+
| `src/coderay/graph/impact.py` | 249 | 2,333 | 693 | **3.4×** | **70%** |
|
|
94
|
+
| `src/coderay/cli/commands.py` | 584 | 4,327 | 1,906 | **2.3×** | **56%** |
|
|
95
|
+
| `src/coderay/pipeline/indexer.py` | 408 | 3,065 | 1,433 | **2.1×** | **53%** |
|
|
96
|
+
|
|
97
|
+
| Query | Search hit tokens | vs full `indexer.py` read |
|
|
98
|
+
|-------|-------------------|---------------------------|
|
|
99
|
+
| "how are files re-indexed on change" | 479 | **~6x cheaper** |
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
## Tools
|
|
69
103
|
|
|
70
|
-
|
|
104
|
+
### Semantic search
|
|
71
105
|
|
|
72
|
-
|
|
106
|
+
Agents search by **meaning**, not by name — useful when the exact function or class is unknown. Results return **file paths with line ranges** pointing at relevant chunks. Treat them as candidates: confirm with `skeleton` or a ranged read before acting. Keep the index fresh with `coderay watch` or `coderay build` when the tree drifts.
|
|
73
107
|
|
|
74
108
|
<img src="assets/coderay-search.gif" alt="coderay search demo" width="100%" />
|
|
75
109
|
|
|
76
110
|
### Blast radius
|
|
77
111
|
|
|
78
|
-
|
|
112
|
+
Shows **callers, imports, and inheritance** for a symbol before it changes. Each result is tied to a file path and line range — combine with `skeleton` or ranged reads on those locations when bodies are needed.
|
|
79
113
|
|
|
80
114
|
<img src="assets/coderay-impact.gif" alt="coderay impact demo" width="100%" />
|
|
81
115
|
|
|
82
116
|
### Skeleton
|
|
83
117
|
|
|
84
|
-
|
|
118
|
+
Returns **signatures and docstrings only** — no function bodies. Every block is tagged with its path and line range so subsequent reads can be scoped to exactly those lines. A full file read should happen only when the skeleton isn't enough.
|
|
85
119
|
|
|
86
120
|
<img src="assets/coderay-skeleton.gif" alt="coderay skeleton demo" width="100%" />
|
|
87
121
|
|
|
88
122
|
### Full read
|
|
89
123
|
|
|
90
|
-
Same file
|
|
124
|
+
**Same file, raw source — for comparison:**
|
|
91
125
|
|
|
92
126
|
<img src="assets/coderay-fullread.gif" alt="same file, raw source head" width="100%" />
|
|
93
127
|
|
|
@@ -102,7 +136,7 @@ Same file as skeleton: raw source costs more tokens.
|
|
|
102
136
|
|
|
103
137
|
## MCP
|
|
104
138
|
|
|
105
|
-
Same tools
|
|
139
|
+
Same three tools over MCP: search, skeleton (paths and line ranges), and impact—so **AI agents** can **narrow context** before full-file reads. Point the server at a checkout whose root contains `.coderay.toml` (`CODERAY_REPO_ROOT` below). For tool choice versus a plain read, see [AGENTS.md](AGENTS.md).
|
|
106
140
|
|
|
107
141
|
```bash
|
|
108
142
|
which coderay-mcp
|
|
@@ -123,32 +157,6 @@ which coderay-mcp
|
|
|
123
157
|
`CODERAY_REPO_ROOT` must be the directory that contains `.coderay.toml`. More detail: [`mcp_server/README.md`](src/coderay/mcp_server/README.md).
|
|
124
158
|
|
|
125
159
|
|
|
126
|
-
## Why this matters
|
|
127
|
-
|
|
128
|
-
Noisy context windows make models confident about the wrong code. CodeRay front-loads **intent** (search), **shape** (skeleton), and **dependencies** (impact) so the expensive read happens after you have a map—not instead of ever reading implementation when control flow matters.
|
|
129
|
-
|
|
130
|
-
### Token savings (tiktoken, `cl100k_base`)
|
|
131
|
-
|
|
132
|
-
Measured on this repo after a full index.
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
| File | Lines | Full read | Skeleton | Savings |
|
|
136
|
-
| ---------------------------------- | ----- | --------- | -------- | -------- |
|
|
137
|
-
| `src/coderay/pipeline/indexer.py` | 400 | 3,024 | 757 | **4.0x** |
|
|
138
|
-
| `src/coderay/graph/code_graph.py` | 500 | 4,261 | 1,022 | **4.2x** |
|
|
139
|
-
| `src/coderay/mcp_server/server.py` | 316 | 2,268 | 1,313 | **1.7x** |
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
| Query | Search hit tokens | vs full `indexer.py` read |
|
|
144
|
-
| ------------------------------------ | ----------------- | ------------------------- |
|
|
145
|
-
| "how are files re-indexed on change" | 479 | **~6x cheaper** |
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
*Not guarantees — model, chunks, and files affect counts.*
|
|
149
|
-
|
|
150
|
-
---
|
|
151
|
-
|
|
152
160
|
## Features
|
|
153
161
|
|
|
154
162
|
- **Languages** — Python, JavaScript, and TypeScript — [`parsing/README.md`](src/coderay/parsing/README.md)
|
|
@@ -157,7 +165,6 @@ Measured on this repo after a full index.
|
|
|
157
165
|
- **Embeddings** — fastembed (CPU) or MLX on Apple Silicon — [`embedding/README.md`](src/coderay/embedding/README.md)
|
|
158
166
|
- **Watch** — incremental re-index; `.coderay.toml` is the source of truth for what’s indexed
|
|
159
167
|
|
|
160
|
-
---
|
|
161
168
|
|
|
162
169
|
## Install
|
|
163
170
|
|
|
@@ -4,38 +4,71 @@
|
|
|
4
4
|
[](LICENSE)
|
|
5
5
|
[](https://github.com/bogdan-copocean/coderay/actions/workflows/ci.yml)
|
|
6
6
|
|
|
7
|
-
**CodeRay**
|
|
7
|
+
**CodeRay** builds a **local code index** that gives AI agents a smarter way to explore a codebase — reading only what they need, not whole files.
|
|
8
8
|
|
|
9
|
-
**No LLM
|
|
9
|
+
**Runs locally. No LLM. No network. No API key.**
|
|
10
10
|
|
|
11
|
+
## The problem
|
|
11
12
|
|
|
12
|
-
|
|
13
|
+
AI agents exploring a codebase default to reading whole files – even when one function is all that's needed. Every unnecessary line **burns tokens** and **floods the context window**: driving up API costs and noise with every read.
|
|
14
|
+
|
|
15
|
+
The root cause is simple: agents know the **file paths** but no finer location. Without knowing *where* in a file something lives, they have no choice but to read everything.
|
|
16
|
+
|
|
17
|
+
**CodeRay fixes this.** Every tool returns **file paths with exact line ranges** — so agents locate first, then read only the lines that matter.
|
|
18
|
+
|
|
19
|
+
## How it works
|
|
20
|
+
|
|
21
|
+
CodeRay exposes three primitives, each returning **paths + line ranges**:
|
|
22
|
+
|
|
23
|
+
| Tool | Question it answers | What agents get |
|
|
24
|
+
|------|---------------------|-----------------|
|
|
25
|
+
| **search** | *Where is the code that does X?* | Relevant chunks with file paths and line ranges |
|
|
26
|
+
| **skeleton** | *What's the shape of this file?* | Signatures + docstrings only, each tagged with its line range |
|
|
27
|
+
| **impact** | *What breaks if I change this?* | Callers, imports, and inheritors — located by line range |
|
|
28
|
+
|
|
29
|
+
### The two-phase flow
|
|
13
30
|
|
|
14
|
-
**
|
|
31
|
+
1. **Locate** — run `search`, `skeleton`, or `impact` to find what's needed. Every result includes a file path and a symbol-level line range.
|
|
32
|
+
2. **Read precisely** — use those line ranges to load only the relevant snippet. Skip the rest.
|
|
15
33
|
|
|
16
|
-
|
|
34
|
+
This keeps context windows lean and agent reasoning focused. CodeRay is not a replacement for `grep` — it fills the gap when exact names are unknown or a map is needed before reading.
|
|
35
|
+
|
|
36
|
+
### Token savings (tiktoken, `cl100k_base`)
|
|
37
|
+
|
|
38
|
+
| File | Lines | Full read | Skeleton | Savings | % reduction |
|
|
39
|
+
|------|-------|-----------|----------|---------|-------------|
|
|
40
|
+
| `src/coderay/graph/impact.py` | 249 | 2,333 | 693 | **3.4×** | **70%** |
|
|
41
|
+
| `src/coderay/cli/commands.py` | 584 | 4,327 | 1,906 | **2.3×** | **56%** |
|
|
42
|
+
| `src/coderay/pipeline/indexer.py` | 408 | 3,065 | 1,433 | **2.1×** | **53%** |
|
|
43
|
+
|
|
44
|
+
| Query | Search hit tokens | vs full `indexer.py` read |
|
|
45
|
+
|-------|-------------------|---------------------------|
|
|
46
|
+
| "how are files re-indexed on change" | 479 | **~6x cheaper** |
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
## Tools
|
|
17
50
|
|
|
18
|
-
|
|
51
|
+
### Semantic search
|
|
19
52
|
|
|
20
|
-
|
|
53
|
+
Agents search by **meaning**, not by name — useful when the exact function or class is unknown. Results return **file paths with line ranges** pointing at relevant chunks. Treat them as candidates: confirm with `skeleton` or a ranged read before acting. Keep the index fresh with `coderay watch` or `coderay build` when the tree drifts.
|
|
21
54
|
|
|
22
55
|
<img src="assets/coderay-search.gif" alt="coderay search demo" width="100%" />
|
|
23
56
|
|
|
24
57
|
### Blast radius
|
|
25
58
|
|
|
26
|
-
|
|
59
|
+
Shows **callers, imports, and inheritance** for a symbol before it changes. Each result is tied to a file path and line range — combine with `skeleton` or ranged reads on those locations when bodies are needed.
|
|
27
60
|
|
|
28
61
|
<img src="assets/coderay-impact.gif" alt="coderay impact demo" width="100%" />
|
|
29
62
|
|
|
30
63
|
### Skeleton
|
|
31
64
|
|
|
32
|
-
|
|
65
|
+
Returns **signatures and docstrings only** — no function bodies. Every block is tagged with its path and line range so subsequent reads can be scoped to exactly those lines. A full file read should happen only when the skeleton isn't enough.
|
|
33
66
|
|
|
34
67
|
<img src="assets/coderay-skeleton.gif" alt="coderay skeleton demo" width="100%" />
|
|
35
68
|
|
|
36
69
|
### Full read
|
|
37
70
|
|
|
38
|
-
Same file
|
|
71
|
+
**Same file, raw source — for comparison:**
|
|
39
72
|
|
|
40
73
|
<img src="assets/coderay-fullread.gif" alt="same file, raw source head" width="100%" />
|
|
41
74
|
|
|
@@ -50,7 +83,7 @@ Same file as skeleton: raw source costs more tokens.
|
|
|
50
83
|
|
|
51
84
|
## MCP
|
|
52
85
|
|
|
53
|
-
Same tools
|
|
86
|
+
Same three tools over MCP: search, skeleton (paths and line ranges), and impact—so **AI agents** can **narrow context** before full-file reads. Point the server at a checkout whose root contains `.coderay.toml` (`CODERAY_REPO_ROOT` below). For tool choice versus a plain read, see [AGENTS.md](AGENTS.md).
|
|
54
87
|
|
|
55
88
|
```bash
|
|
56
89
|
which coderay-mcp
|
|
@@ -71,32 +104,6 @@ which coderay-mcp
|
|
|
71
104
|
`CODERAY_REPO_ROOT` must be the directory that contains `.coderay.toml`. More detail: [`mcp_server/README.md`](src/coderay/mcp_server/README.md).
|
|
72
105
|
|
|
73
106
|
|
|
74
|
-
## Why this matters
|
|
75
|
-
|
|
76
|
-
Noisy context windows make models confident about the wrong code. CodeRay front-loads **intent** (search), **shape** (skeleton), and **dependencies** (impact) so the expensive read happens after you have a map—not instead of ever reading implementation when control flow matters.
|
|
77
|
-
|
|
78
|
-
### Token savings (tiktoken, `cl100k_base`)
|
|
79
|
-
|
|
80
|
-
Measured on this repo after a full index.
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
| File | Lines | Full read | Skeleton | Savings |
|
|
84
|
-
| ---------------------------------- | ----- | --------- | -------- | -------- |
|
|
85
|
-
| `src/coderay/pipeline/indexer.py` | 400 | 3,024 | 757 | **4.0x** |
|
|
86
|
-
| `src/coderay/graph/code_graph.py` | 500 | 4,261 | 1,022 | **4.2x** |
|
|
87
|
-
| `src/coderay/mcp_server/server.py` | 316 | 2,268 | 1,313 | **1.7x** |
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
| Query | Search hit tokens | vs full `indexer.py` read |
|
|
92
|
-
| ------------------------------------ | ----------------- | ------------------------- |
|
|
93
|
-
| "how are files re-indexed on change" | 479 | **~6x cheaper** |
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
*Not guarantees — model, chunks, and files affect counts.*
|
|
97
|
-
|
|
98
|
-
---
|
|
99
|
-
|
|
100
107
|
## Features
|
|
101
108
|
|
|
102
109
|
- **Languages** — Python, JavaScript, and TypeScript — [`parsing/README.md`](src/coderay/parsing/README.md)
|
|
@@ -105,7 +112,6 @@ Measured on this repo after a full index.
|
|
|
105
112
|
- **Embeddings** — fastembed (CPU) or MLX on Apple Silicon — [`embedding/README.md`](src/coderay/embedding/README.md)
|
|
106
113
|
- **Watch** — incremental re-index; `.coderay.toml` is the source of truth for what’s indexed
|
|
107
114
|
|
|
108
|
-
---
|
|
109
115
|
|
|
110
116
|
## Install
|
|
111
117
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "coderay"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.2.0"
|
|
8
8
|
description = "X-ray your codebase — semantic search, code graphs, file skeletons, and MCP server"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -53,6 +53,7 @@ dev = [
|
|
|
53
53
|
"pytest-cov>=4.0",
|
|
54
54
|
"ruff>=0.8.0",
|
|
55
55
|
"mypy>=1.0.0",
|
|
56
|
+
"tiktoken>=0.5.0",
|
|
56
57
|
]
|
|
57
58
|
maintain = [
|
|
58
59
|
"pylance>=0.15.0",
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.2.0"
|
|
@@ -4,6 +4,7 @@ import logging
|
|
|
4
4
|
import os
|
|
5
5
|
import sys
|
|
6
6
|
import time
|
|
7
|
+
import warnings
|
|
7
8
|
from pathlib import Path
|
|
8
9
|
|
|
9
10
|
import click
|
|
@@ -51,6 +52,12 @@ def _setup_logging(verbose: bool = False) -> None:
|
|
|
51
52
|
):
|
|
52
53
|
logging.getLogger(name).setLevel(logging.WARNING)
|
|
53
54
|
os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1")
|
|
55
|
+
warnings.filterwarnings(
|
|
56
|
+
"ignore",
|
|
57
|
+
message="Cannot enable progress bars: environment variable",
|
|
58
|
+
category=UserWarning,
|
|
59
|
+
module="huggingface_hub.utils.tqdm",
|
|
60
|
+
)
|
|
54
61
|
|
|
55
62
|
|
|
56
63
|
def _set_repo_root(repo_root: Path) -> None:
|
|
@@ -333,7 +340,7 @@ def maintain(ctx: click.Context) -> None:
|
|
|
333
340
|
|
|
334
341
|
|
|
335
342
|
@cli.command()
|
|
336
|
-
@click.argument("file_path", type=
|
|
343
|
+
@click.argument("file_path", type=str)
|
|
337
344
|
@click.option(
|
|
338
345
|
"--include-imports",
|
|
339
346
|
is_flag=True,
|
|
@@ -346,17 +353,53 @@ def maintain(ctx: click.Context) -> None:
|
|
|
346
353
|
default=None,
|
|
347
354
|
help="Filter to a specific class or top-level function by name.",
|
|
348
355
|
)
|
|
356
|
+
@click.option(
|
|
357
|
+
"--lines",
|
|
358
|
+
"line_range",
|
|
359
|
+
default=None,
|
|
360
|
+
metavar="START-END",
|
|
361
|
+
help=(
|
|
362
|
+
"File line range (1-based inclusive); keep only symbols fully within this span."
|
|
363
|
+
" Do not combine with a :START-END suffix on FILE_PATH (same meaning)."
|
|
364
|
+
),
|
|
365
|
+
)
|
|
349
366
|
def skeleton(
|
|
350
|
-
file_path:
|
|
367
|
+
file_path: str,
|
|
351
368
|
include_imports: bool,
|
|
352
369
|
symbol: str | None,
|
|
370
|
+
line_range: str | None,
|
|
353
371
|
) -> None:
|
|
354
372
|
"""Print signatures without bodies (cheaper than reading the full file)."""
|
|
355
373
|
from coderay.skeleton.extractor import extract_skeleton
|
|
374
|
+
from coderay.skeleton.path_range import (
|
|
375
|
+
parse_file_line_range,
|
|
376
|
+
parse_skeleton_file_arg,
|
|
377
|
+
)
|
|
356
378
|
|
|
357
|
-
|
|
379
|
+
try:
|
|
380
|
+
path_str, rng_from_path = parse_skeleton_file_arg(file_path, parse_suffix=True)
|
|
381
|
+
except ValueError as e:
|
|
382
|
+
raise click.BadParameter(str(e)) from e
|
|
383
|
+
file_line_range = rng_from_path
|
|
384
|
+
if line_range:
|
|
385
|
+
if file_line_range is not None:
|
|
386
|
+
raise click.UsageError(
|
|
387
|
+
"Use either a path ending with :START-END or --lines, not both."
|
|
388
|
+
)
|
|
389
|
+
try:
|
|
390
|
+
file_line_range = parse_file_line_range(line_range)
|
|
391
|
+
except ValueError as e:
|
|
392
|
+
raise click.BadParameter(str(e), param_hint="--lines") from e
|
|
393
|
+
resolved = Path(path_str)
|
|
394
|
+
if not resolved.is_file():
|
|
395
|
+
raise click.BadParameter(f"not a file: {path_str}", param_hint="file_path")
|
|
396
|
+
content = resolved.read_text(encoding="utf-8", errors="replace")
|
|
358
397
|
out = extract_skeleton(
|
|
359
|
-
|
|
398
|
+
resolved,
|
|
399
|
+
content,
|
|
400
|
+
include_imports=include_imports,
|
|
401
|
+
symbol=symbol,
|
|
402
|
+
line_range=file_line_range,
|
|
360
403
|
)
|
|
361
404
|
click.echo(out)
|
|
362
405
|
|
|
@@ -30,7 +30,7 @@ def timed(phase: str) -> Callable[[F], F]:
|
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
class TimedPhase:
|
|
33
|
-
"""Context manager: measure block execution time."""
|
|
33
|
+
"""Context manager: measure block execution time; log completion at DEBUG."""
|
|
34
34
|
|
|
35
35
|
def __init__(self, phase: str, *, log: bool = True) -> None:
|
|
36
36
|
self.phase = phase
|
|
@@ -42,10 +42,15 @@ class TimedPhase:
|
|
|
42
42
|
self.t0 = time.perf_counter()
|
|
43
43
|
return self
|
|
44
44
|
|
|
45
|
+
def elapsed_so_far(self) -> float:
|
|
46
|
+
"""Return seconds since __enter__ (before __exit__)."""
|
|
47
|
+
|
|
48
|
+
return time.perf_counter() - self.t0
|
|
49
|
+
|
|
45
50
|
def __exit__(self, *args: object) -> None:
|
|
46
51
|
self.elapsed = time.perf_counter() - self.t0
|
|
47
52
|
if self.log:
|
|
48
|
-
logger.
|
|
53
|
+
logger.debug("%s: %.3fs", self.phase, self.elapsed)
|
|
49
54
|
|
|
50
55
|
|
|
51
56
|
timed_phase = TimedPhase # Convenience alias for context manager usage
|
|
@@ -48,13 +48,13 @@ def load_embedder_from_config() -> Embedder:
|
|
|
48
48
|
config = get_config()
|
|
49
49
|
ed = config.embedder
|
|
50
50
|
backend = resolved_embedder_backend(ed.backend)
|
|
51
|
-
if (ed.backend or "auto").strip().lower() == "auto":
|
|
52
|
-
logger.info("embedder.backend=auto -> %s", backend)
|
|
53
51
|
if backend == "mlx" and not mlx_optional_installed():
|
|
54
52
|
raise RuntimeError(
|
|
55
53
|
"embedder.backend is 'mlx' but MLX is not installed. "
|
|
56
54
|
"On Apple Silicon: pip install 'coderay[mlx]'"
|
|
57
55
|
)
|
|
56
|
+
model_name = ed.mlx.model_name if backend == "mlx" else ed.fastembed.model_name
|
|
57
|
+
logger.info("embedder.backend=%s model=%s", backend, model_name)
|
|
58
58
|
if backend == "mlx":
|
|
59
59
|
mx = ed.mlx
|
|
60
60
|
return MLXEmbedder(
|
|
@@ -5,6 +5,7 @@ from typing import Any
|
|
|
5
5
|
|
|
6
6
|
from onnxruntime.capi.onnxruntime_pybind11_state import NoSuchFile
|
|
7
7
|
|
|
8
|
+
from coderay.core.timing import timed_phase
|
|
8
9
|
from coderay.embedding.base import Embedder, EmbedTask
|
|
9
10
|
from coderay.embedding.prefixes import SEARCH_PREFIXES, requires_prefix
|
|
10
11
|
|
|
@@ -38,15 +39,14 @@ class LocalEmbedder(Embedder):
|
|
|
38
39
|
return TextEmbedding(model_name=name, local_files_only=local_only)
|
|
39
40
|
|
|
40
41
|
try:
|
|
41
|
-
logger.info("Loading model %s from cache...", self._model_name)
|
|
42
42
|
self._model = _open(name=self._model_name, local_only=True)
|
|
43
|
-
logger.info("Model %s loaded from cache.", self._model_name)
|
|
44
43
|
except (NoSuchFile, ValueError) as e:
|
|
45
44
|
if isinstance(e, ValueError) and "Could not load model" not in str(e):
|
|
46
45
|
raise
|
|
47
|
-
logger.info("Downloading model %s (one-time)...", self._model_name)
|
|
48
46
|
self._model = _open(name=self._model_name, local_only=False)
|
|
49
|
-
logger.info("Model %s downloaded
|
|
47
|
+
logger.info("Model %s ready (downloaded).", self._model_name)
|
|
48
|
+
else:
|
|
49
|
+
logger.info("Model %s ready (cache).", self._model_name)
|
|
50
50
|
|
|
51
51
|
def _apply_prefix(self, texts: list[str], task: EmbedTask) -> list[str]:
|
|
52
52
|
if not requires_prefix(self._model_name):
|
|
@@ -66,9 +66,21 @@ class LocalEmbedder(Embedder):
|
|
|
66
66
|
self._load_model()
|
|
67
67
|
|
|
68
68
|
prefixed = self._apply_prefix(texts, task)
|
|
69
|
-
|
|
70
|
-
logger.info("Embedding %d chunks (task=%s)...",
|
|
71
|
-
|
|
69
|
+
n = len(prefixed)
|
|
70
|
+
logger.info("Embedding %d chunks (task=%s)...", n, task.value)
|
|
71
|
+
raw: list[Any] = []
|
|
72
|
+
bs = self._batch_size
|
|
73
|
+
with timed_phase("embedding", log=False) as tp:
|
|
74
|
+
for i in range(0, n, bs):
|
|
75
|
+
sub = prefixed[i : i + bs]
|
|
76
|
+
part = list(self._model.embed(sub, batch_size=self._batch_size))
|
|
77
|
+
raw.extend(part)
|
|
78
|
+
logger.info("Embedded %d/%d chunks", min(i + len(sub), n), n)
|
|
79
|
+
logger.info(
|
|
80
|
+
"Embedding complete: %d chunks in %.2fs",
|
|
81
|
+
n,
|
|
82
|
+
tp.elapsed,
|
|
83
|
+
)
|
|
72
84
|
if self._matryoshka_dimensions is not None:
|
|
73
|
-
return [e.tolist()[: self._matryoshka_dimensions] for e in
|
|
74
|
-
return [e.tolist() for e in
|
|
85
|
+
return [e.tolist()[: self._matryoshka_dimensions] for e in raw]
|
|
86
|
+
return [e.tolist() for e in raw]
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
|
|
3
|
+
from coderay.core.timing import timed_phase
|
|
3
4
|
from coderay.embedding.base import Embedder, EmbedTask
|
|
4
5
|
from coderay.embedding.prefixes import SEARCH_PREFIXES, requires_prefix
|
|
5
6
|
|
|
@@ -44,6 +45,7 @@ class MLXEmbedder(Embedder):
|
|
|
44
45
|
prefix = SEARCH_PREFIXES.get(task, "")
|
|
45
46
|
texts = [prefix + t for t in texts] if prefix else texts
|
|
46
47
|
|
|
48
|
+
logger.info("Embedding %d chunks (task=%s)...", len(texts), task.value)
|
|
47
49
|
return self._embed_batched(texts)
|
|
48
50
|
|
|
49
51
|
def _ensure_loaded(self) -> None:
|
|
@@ -53,20 +55,13 @@ class MLXEmbedder(Embedder):
|
|
|
53
55
|
from mlx_embeddings import load
|
|
54
56
|
|
|
55
57
|
cached = self._is_cached()
|
|
56
|
-
if cached:
|
|
57
|
-
logger.info(
|
|
58
|
-
"Loading model %s from cache (%s)...",
|
|
59
|
-
self._model_name,
|
|
60
|
-
mx.default_device(),
|
|
61
|
-
)
|
|
62
|
-
else:
|
|
63
|
-
logger.info(
|
|
64
|
-
"Downloading model %s (one-time, %s)...",
|
|
65
|
-
self._model_name,
|
|
66
|
-
mx.default_device(),
|
|
67
|
-
)
|
|
68
58
|
self._model, self._tokenizer = load(self._model_name)
|
|
69
|
-
logger.info(
|
|
59
|
+
logger.info(
|
|
60
|
+
"Model %s ready (%s, %s).",
|
|
61
|
+
self._model_name,
|
|
62
|
+
"cache" if cached else "downloaded",
|
|
63
|
+
mx.default_device(),
|
|
64
|
+
)
|
|
70
65
|
|
|
71
66
|
def _is_cached(self) -> bool:
|
|
72
67
|
"""Check if model exists in huggingface cache."""
|
|
@@ -83,12 +78,18 @@ class MLXEmbedder(Embedder):
|
|
|
83
78
|
n = len(texts)
|
|
84
79
|
bs = self._batch_size
|
|
85
80
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
81
|
+
with timed_phase("embedding", log=False) as tp:
|
|
82
|
+
for i in range(0, n, bs):
|
|
83
|
+
batch = texts[i : i + bs]
|
|
84
|
+
arr = self._embed_single_batch(batch)
|
|
85
|
+
out.extend(arr.tolist())
|
|
86
|
+
logger.info("Embedded %d/%d chunks", min(i + bs, n), n)
|
|
87
|
+
|
|
88
|
+
logger.info(
|
|
89
|
+
"Embedding complete: %d chunks in %.2fs",
|
|
90
|
+
n,
|
|
91
|
+
tp.elapsed,
|
|
92
|
+
)
|
|
92
93
|
return out
|
|
93
94
|
|
|
94
95
|
def _embed_single_batch(self, batch: list[str]):
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# graph
|
|
2
|
+
|
|
3
|
+
Directed **calls**, **imports**, and **inheritance** over indexed source. The implementation is laid out as extractors, lowering, merge, and post-merge passes in this package; this file describes **behavior**, not file names.
|
|
4
|
+
|
|
5
|
+
## Pipeline (conceptual)
|
|
6
|
+
|
|
7
|
+
Per file: CST → **facts** (definitions, calls, imports, inherits) → **materialise** into `GraphNode` / `GraphEdge`. Multi-file **merge** builds one `CodeGraph`. **Post-merge** runs language passes and global rewrites (e.g. resolving bare-name call targets when unambiguous repo-wide).
|
|
8
|
+
|
|
9
|
+
Cross-file lowering uses a **module index** (dotted name → file path) so imports and qualified names can become `file_path::symbol` targets. Edges may point at **phantom** strings (unresolved callee) until passes or later tooling refine them.
|
|
10
|
+
|
|
11
|
+
## Targets and phantoms
|
|
12
|
+
|
|
13
|
+
Call/import/inherit **targets are strings**: resolved node ids (`file::qual`), module-style refs (`pkg.mod.sym`), or **phantoms** (short names, unknowns). Heuristics classify targets for filtering and UX; **materialise** can emit edges whose endpoints are not yet graph nodes.
|
|
14
|
+
|
|
15
|
+
**`include_external`** (config) drops edges whose targets are not considered “in repo” for the current index.
|
|
16
|
+
|
|
17
|
+
## Symbol resolution (`CodeGraph`)
|
|
18
|
+
|
|
19
|
+
Indexes back **short names** and **qualified names** to node ids. **Unique** short name → one id; **ambiguous** → `resolve_symbol` returns `None` (callers must use full id or disambiguate).
|
|
20
|
+
|
|
21
|
+
## Impact radius (`impact.py`)
|
|
22
|
+
|
|
23
|
+
**Reverse** traversal from a symbol: who **calls**, **imports**, or **inherits** toward it, up to a **depth** limit. Not every edge kind is impact-relevant; module nodes are filtered when the same file is already represented by concrete symbols.
|
|
24
|
+
|
|
25
|
+
**Resolution layers:** exact id → optional **fuzzy** match by trailing name within a file → hints when ambiguous or empty results. **Seeds** for a method can include the **parent class’s** same-named method when inheritance is present, so callers of the base implementation count toward impact on overrides. **Phantom aliases** (same symbol under different string ids) are considered so edges from re-exports or legacy shapes are not missed.
|
|
26
|
+
|
|
27
|
+
**Limitations:** static graph only—dynamic dispatch, reflection, and cross-repo callers are not modeled; hints may suggest grep when imports exist but call edges could not be resolved.
|
|
28
|
+
|
|
29
|
+
## Callee lowering (`CalleeResolver`)
|
|
30
|
+
|
|
31
|
+
Raw callee text from the tree (e.g. `self.m`, `super().x`, `a.b`) is combined with **per-file bindings** (imports, instance typing, scopes) to produce target strings. Order matters: **super** / **self** handling runs before generic **simple** and **dotted-chain** resolution. Behavior is shared across languages where configs align (`self`/`super` prefixes); edge cases differ by language grammar and binding richness.
|
|
32
|
+
|
|
33
|
+
## Known limitations (general)
|
|
34
|
+
|
|
35
|
+
- **Soundness:** graph is **heuristic**, not a type system; wrong or missing edges are expected under metaprogramming, conditional imports, and incomplete index scope.
|
|
36
|
+
- **Staleness:** graph reflects last build; **watch** / rebuild needed after large refactors.
|
|
37
|
+
- **Language coverage:** depth varies by language (Python/JS/TS today); new languages plug in via the same fact/materialise/merge shape but need their own extractors and tests.
|
|
38
|
+
|
|
39
|
+
## Tests
|
|
40
|
+
|
|
41
|
+
[`tests/unit/graph/`](../../../tests/unit/graph/) (invariants, extractors, resolver), [`tests/regression/graph/`](../../../tests/regression/graph/) (multi-file fixtures).
|
|
42
|
+
|
|
43
|
+
## Persistence
|
|
44
|
+
|
|
45
|
+
`graph.json` under the index directory; **`schema_version`** supports loading older serialised shapes when bumped.
|
|
@@ -6,14 +6,15 @@ from coderay.graph.builder import (
|
|
|
6
6
|
save_graph,
|
|
7
7
|
)
|
|
8
8
|
from coderay.graph.code_graph import CodeGraph
|
|
9
|
-
from coderay.graph.
|
|
9
|
+
from coderay.graph.graph_builder import GraphBuilder, build_project_index
|
|
10
10
|
|
|
11
11
|
__all__ = [
|
|
12
12
|
"GRAPH_FILENAME",
|
|
13
13
|
"CodeGraph",
|
|
14
|
+
"GraphBuilder",
|
|
15
|
+
"build_project_index",
|
|
14
16
|
"build_and_save_graph",
|
|
15
17
|
"build_graph",
|
|
16
|
-
"extract_graph_from_file",
|
|
17
18
|
"load_graph",
|
|
18
19
|
"save_graph",
|
|
19
20
|
]
|