coderay 1.1.1__tar.gz → 1.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {coderay-1.1.1/src/coderay.egg-info → coderay-1.2.1}/PKG-INFO +47 -38
- {coderay-1.1.1 → coderay-1.2.1}/README.md +45 -37
- {coderay-1.1.1 → coderay-1.2.1}/pyproject.toml +2 -1
- coderay-1.2.1/src/coderay/__init__.py +1 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/cli/commands.py +40 -4
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/core/defaults/default.coderay.toml +2 -2
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/embedding/README.md +21 -13
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/mcp_server/server.py +46 -9
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/parsing/languages.py +39 -20
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/skeleton/README.md +3 -2
- coderay-1.2.1/src/coderay/skeleton/extractor.py +402 -0
- coderay-1.2.1/src/coderay/skeleton/path_range.py +39 -0
- {coderay-1.1.1 → coderay-1.2.1/src/coderay.egg-info}/PKG-INFO +47 -38
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay.egg-info/SOURCES.txt +1 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay.egg-info/requires.txt +1 -0
- coderay-1.1.1/src/coderay/__init__.py +0 -1
- coderay-1.1.1/src/coderay/skeleton/extractor.py +0 -281
- {coderay-1.1.1 → coderay-1.2.1}/LICENSE +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/MANIFEST.in +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/NOTICE +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/setup.cfg +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/README.md +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/chunking/README.md +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/chunking/__init__.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/chunking/chunker.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/cli/README.md +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/cli/__init__.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/cli/search_input.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/core/README.md +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/core/__init__.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/core/config.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/core/defaults/__init__.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/core/errors.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/core/index_workspace.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/core/lock.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/core/models.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/core/timing.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/core/utils.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/embedding/__init__.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/embedding/backend_resolve.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/embedding/base.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/embedding/format.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/embedding/local.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/embedding/mlx_backend.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/embedding/prefixes.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/README.md +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/__init__.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/builder.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/code_graph.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/extractors/__init__.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/extractors/base.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/extractors/js_ts/__init__.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/extractors/js_ts/extractor.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/extractors/python/__init__.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/extractors/python/extractor.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/facts.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/graph_builder.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/handlers/__init__.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/handlers/assignment_binder.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/handlers/call_emitter.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/handlers/decorator_emitter.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/handlers/definition_binder.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/handlers/definition_emitter.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/handlers/helpers.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/handlers/js_ts/__init__.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/handlers/js_ts/import_binder.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/handlers/js_ts/import_emitter.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/handlers/python/__init__.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/handlers/python/assignment_binder.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/handlers/python/function_binder.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/handlers/python/import_binder.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/handlers/python/import_emitter.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/handlers/python/with_binder.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/handlers/typed_annotations.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/handlers/typed_params.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/impact.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/language_plugin.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/lowering/__init__.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/lowering/callee_resolver.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/lowering/callee_strategy.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/lowering/cst_helpers.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/lowering/name_bindings.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/materialise.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/passes/__init__.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/passes/python.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/passes/resolve_bare_phantoms.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/pipeline.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/project_index.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/refs.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/graph/utils.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/mcp_server/README.md +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/mcp_server/__init__.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/mcp_server/errors.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/parsing/README.md +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/parsing/base.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/parsing/conventions.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/parsing/cst_kind.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/parsing/cst_traversal.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/pipeline/README.md +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/pipeline/__init__.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/pipeline/indexer.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/pipeline/watcher.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/retrieval/README.md +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/retrieval/__init__.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/retrieval/boosting.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/retrieval/models.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/retrieval/search.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/skeleton/__init__.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/state/README.md +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/state/__init__.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/state/machine.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/state/version.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/storage/README.md +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/storage/__init__.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/storage/lancedb.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/vcs/README.md +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/vcs/__init__.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay/vcs/git.py +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay.egg-info/dependency_links.txt +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay.egg-info/entry_points.txt +0 -0
- {coderay-1.1.1 → coderay-1.2.1}/src/coderay.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: coderay
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.1
|
|
4
4
|
Summary: X-ray your codebase — semantic search, code graphs, file skeletons, and MCP server
|
|
5
5
|
Author-email: Bogdan Copocean <bogdancopocean@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -41,6 +41,7 @@ Requires-Dist: pytest>=7.0; extra == "dev"
|
|
|
41
41
|
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
42
42
|
Requires-Dist: ruff>=0.8.0; extra == "dev"
|
|
43
43
|
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
44
|
+
Requires-Dist: tiktoken>=0.5.0; extra == "dev"
|
|
44
45
|
Provides-Extra: maintain
|
|
45
46
|
Requires-Dist: pylance>=0.15.0; extra == "maintain"
|
|
46
47
|
Provides-Extra: mlx
|
|
@@ -56,38 +57,71 @@ Dynamic: license-file
|
|
|
56
57
|
[](LICENSE)
|
|
57
58
|
[](https://github.com/bogdan-copocean/coderay/actions/workflows/ci.yml)
|
|
58
59
|
|
|
59
|
-
**CodeRay**
|
|
60
|
+
**CodeRay** builds a **local code index** that gives AI agents a smarter way to explore a codebase — reading only what they need, not whole files.
|
|
60
61
|
|
|
61
|
-
**No LLM
|
|
62
|
+
**Runs locally. No LLM. No network. No API key.**
|
|
62
63
|
|
|
64
|
+
## The problem
|
|
63
65
|
|
|
64
|
-
|
|
66
|
+
AI agents exploring a codebase default to reading whole files – even when one function is all that's needed. Every unnecessary line **burns tokens** and **floods the context window**: driving up API costs and noise with every read.
|
|
67
|
+
|
|
68
|
+
The root cause is simple: agents know the **file paths** but no finer location. Without knowing *where* in a file something lives, they have no choice but to read everything.
|
|
69
|
+
|
|
70
|
+
**CodeRay fixes this.** Every tool returns **file paths with exact line ranges** — so agents locate first, then read only the lines that matter.
|
|
71
|
+
|
|
72
|
+
## How it works
|
|
73
|
+
|
|
74
|
+
CodeRay exposes three primitives, each returning **paths + line ranges**:
|
|
75
|
+
|
|
76
|
+
| Tool | Question it answers | What agents get |
|
|
77
|
+
|------|---------------------|-----------------|
|
|
78
|
+
| **search** | *Where is the code that does X?* | Relevant chunks with file paths and line ranges |
|
|
79
|
+
| **skeleton** | *What's the shape of this file?* | Signatures + docstrings only, each tagged with its line range |
|
|
80
|
+
| **impact** | *What breaks if I change this?* | Callers, imports, and inheritors — located by line range |
|
|
81
|
+
|
|
82
|
+
### The two-phase flow
|
|
65
83
|
|
|
66
|
-
**
|
|
84
|
+
1. **Locate** — run `search`, `skeleton`, or `impact` to find what's needed. Every result includes a file path and a symbol-level line range.
|
|
85
|
+
2. **Read precisely** — use those line ranges to load only the relevant snippet. Skip the rest.
|
|
67
86
|
|
|
68
|
-
|
|
87
|
+
This keeps context windows lean and agent reasoning focused. CodeRay is not a replacement for `grep` — it fills the gap when exact names are unknown or a map is needed before reading.
|
|
88
|
+
|
|
89
|
+
### Token savings (tiktoken, `cl100k_base`)
|
|
90
|
+
|
|
91
|
+
| File | Lines | Full read | Skeleton | Savings | % reduction |
|
|
92
|
+
|------|-------|-----------|----------|---------|-------------|
|
|
93
|
+
| `src/coderay/graph/impact.py` | 249 | 2,333 | 693 | **3.4×** | **70%** |
|
|
94
|
+
| `src/coderay/cli/commands.py` | 584 | 4,327 | 1,906 | **2.3×** | **56%** |
|
|
95
|
+
| `src/coderay/pipeline/indexer.py` | 408 | 3,065 | 1,433 | **2.1×** | **53%** |
|
|
96
|
+
|
|
97
|
+
| Query | Search hit tokens | vs full `indexer.py` read |
|
|
98
|
+
|-------|-------------------|---------------------------|
|
|
99
|
+
| "how are files re-indexed on change" | 479 | **~6x cheaper** |
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
## Tools
|
|
69
103
|
|
|
70
|
-
|
|
104
|
+
### Semantic search
|
|
71
105
|
|
|
72
|
-
|
|
106
|
+
Agents search by **meaning**, not by name — useful when the exact function or class is unknown. Results return **file paths with line ranges** pointing at relevant chunks. Treat them as candidates: confirm with `skeleton` or a ranged read before acting. Keep the index fresh with `coderay watch` or `coderay build` when the tree drifts.
|
|
73
107
|
|
|
74
108
|
<img src="assets/coderay-search.gif" alt="coderay search demo" width="100%" />
|
|
75
109
|
|
|
76
110
|
### Blast radius
|
|
77
111
|
|
|
78
|
-
|
|
112
|
+
Shows **callers, imports, and inheritance** for a symbol before it changes. Each result is tied to a file path and line range — combine with `skeleton` or ranged reads on those locations when bodies are needed.
|
|
79
113
|
|
|
80
114
|
<img src="assets/coderay-impact.gif" alt="coderay impact demo" width="100%" />
|
|
81
115
|
|
|
82
116
|
### Skeleton
|
|
83
117
|
|
|
84
|
-
|
|
118
|
+
Returns **signatures and docstrings only** — no function bodies. Every block is tagged with its path and line range so subsequent reads can be scoped to exactly those lines. A full file read should happen only when the skeleton isn't enough.
|
|
85
119
|
|
|
86
120
|
<img src="assets/coderay-skeleton.gif" alt="coderay skeleton demo" width="100%" />
|
|
87
121
|
|
|
88
122
|
### Full read
|
|
89
123
|
|
|
90
|
-
Same file
|
|
124
|
+
**Same file, raw source — for comparison:**
|
|
91
125
|
|
|
92
126
|
<img src="assets/coderay-fullread.gif" alt="same file, raw source head" width="100%" />
|
|
93
127
|
|
|
@@ -102,7 +136,7 @@ Same file as skeleton: raw source costs more tokens.
|
|
|
102
136
|
|
|
103
137
|
## MCP
|
|
104
138
|
|
|
105
|
-
Same tools
|
|
139
|
+
Same three tools over MCP: search, skeleton (paths and line ranges), and impact—so **AI agents** can **narrow context** before full-file reads. Point the server at a checkout whose root contains `.coderay.toml` (`CODERAY_REPO_ROOT` below). For tool choice versus a plain read, see [AGENTS.md](AGENTS.md).
|
|
106
140
|
|
|
107
141
|
```bash
|
|
108
142
|
which coderay-mcp
|
|
@@ -123,37 +157,12 @@ which coderay-mcp
|
|
|
123
157
|
`CODERAY_REPO_ROOT` must be the directory that contains `.coderay.toml`. More detail: [`mcp_server/README.md`](src/coderay/mcp_server/README.md).
|
|
124
158
|
|
|
125
159
|
|
|
126
|
-
## Why this matters
|
|
127
|
-
|
|
128
|
-
Noisy context windows make models confident about the wrong code. CodeRay front-loads **intent** (search), **shape** (skeleton), and **dependencies** (impact) so the expensive read happens after you have a map—not instead of ever reading implementation when control flow matters.
|
|
129
|
-
|
|
130
|
-
### Token savings (tiktoken, `cl100k_base`)
|
|
131
|
-
|
|
132
|
-
Measured on this repo after a full index.
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
| File | Lines | Full read | Skeleton | Savings |
|
|
136
|
-
| ---------------------------------- | ----- | --------- | -------- | -------- |
|
|
137
|
-
| `src/coderay/pipeline/indexer.py` | 400 | 3,024 | 757 | **4.0x** |
|
|
138
|
-
| `src/coderay/graph/code_graph.py` | 500 | 4,261 | 1,022 | **4.2x** |
|
|
139
|
-
| `src/coderay/mcp_server/server.py` | 316 | 2,268 | 1,313 | **1.7x** |
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
| Query | Search hit tokens | vs full `indexer.py` read |
|
|
144
|
-
| ------------------------------------ | ----------------- | ------------------------- |
|
|
145
|
-
| "how are files re-indexed on change" | 479 | **~6x cheaper** |
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
*Not guarantees — model, chunks, and files affect counts.*
|
|
149
|
-
|
|
150
|
-
|
|
151
160
|
## Features
|
|
152
161
|
|
|
153
162
|
- **Languages** — Python, JavaScript, and TypeScript — [`parsing/README.md`](src/coderay/parsing/README.md)
|
|
154
163
|
- **Multi-repo / monorepo** — roots, aliases, optional `include` subtrees — [`core/README.md`](src/coderay/core/README.md)
|
|
155
164
|
- **Hybrid search** — vector + BM25 (RRF), optional boosting — [`retrieval/README.md`](src/coderay/retrieval/README.md)
|
|
156
|
-
- **Embeddings** — fastembed (CPU) or MLX on Apple Silicon — [`embedding/README.md`](src/coderay/embedding/README.md)
|
|
165
|
+
- **Embeddings** — fastembed (CPU) or MLX on Apple Silicon; defaults to MiniLM L6 for speed — configure BGE in `.coderay.toml` for stronger (heavier) vectors — [`embedding/README.md`](src/coderay/embedding/README.md)
|
|
157
166
|
- **Watch** — incremental re-index; `.coderay.toml` is the source of truth for what’s indexed
|
|
158
167
|
|
|
159
168
|
|
|
@@ -4,38 +4,71 @@
|
|
|
4
4
|
[](LICENSE)
|
|
5
5
|
[](https://github.com/bogdan-copocean/coderay/actions/workflows/ci.yml)
|
|
6
6
|
|
|
7
|
-
**CodeRay**
|
|
7
|
+
**CodeRay** builds a **local code index** that gives AI agents a smarter way to explore a codebase — reading only what they need, not whole files.
|
|
8
8
|
|
|
9
|
-
**No LLM
|
|
9
|
+
**Runs locally. No LLM. No network. No API key.**
|
|
10
10
|
|
|
11
|
+
## The problem
|
|
11
12
|
|
|
12
|
-
|
|
13
|
+
AI agents exploring a codebase default to reading whole files – even when one function is all that's needed. Every unnecessary line **burns tokens** and **floods the context window**: driving up API costs and noise with every read.
|
|
14
|
+
|
|
15
|
+
The root cause is simple: agents know the **file paths** but no finer location. Without knowing *where* in a file something lives, they have no choice but to read everything.
|
|
16
|
+
|
|
17
|
+
**CodeRay fixes this.** Every tool returns **file paths with exact line ranges** — so agents locate first, then read only the lines that matter.
|
|
18
|
+
|
|
19
|
+
## How it works
|
|
20
|
+
|
|
21
|
+
CodeRay exposes three primitives, each returning **paths + line ranges**:
|
|
22
|
+
|
|
23
|
+
| Tool | Question it answers | What agents get |
|
|
24
|
+
|------|---------------------|-----------------|
|
|
25
|
+
| **search** | *Where is the code that does X?* | Relevant chunks with file paths and line ranges |
|
|
26
|
+
| **skeleton** | *What's the shape of this file?* | Signatures + docstrings only, each tagged with its line range |
|
|
27
|
+
| **impact** | *What breaks if I change this?* | Callers, imports, and inheritors — located by line range |
|
|
28
|
+
|
|
29
|
+
### The two-phase flow
|
|
13
30
|
|
|
14
|
-
**
|
|
31
|
+
1. **Locate** — run `search`, `skeleton`, or `impact` to find what's needed. Every result includes a file path and a symbol-level line range.
|
|
32
|
+
2. **Read precisely** — use those line ranges to load only the relevant snippet. Skip the rest.
|
|
15
33
|
|
|
16
|
-
|
|
34
|
+
This keeps context windows lean and agent reasoning focused. CodeRay is not a replacement for `grep` — it fills the gap when exact names are unknown or a map is needed before reading.
|
|
35
|
+
|
|
36
|
+
### Token savings (tiktoken, `cl100k_base`)
|
|
37
|
+
|
|
38
|
+
| File | Lines | Full read | Skeleton | Savings | % reduction |
|
|
39
|
+
|------|-------|-----------|----------|---------|-------------|
|
|
40
|
+
| `src/coderay/graph/impact.py` | 249 | 2,333 | 693 | **3.4×** | **70%** |
|
|
41
|
+
| `src/coderay/cli/commands.py` | 584 | 4,327 | 1,906 | **2.3×** | **56%** |
|
|
42
|
+
| `src/coderay/pipeline/indexer.py` | 408 | 3,065 | 1,433 | **2.1×** | **53%** |
|
|
43
|
+
|
|
44
|
+
| Query | Search hit tokens | vs full `indexer.py` read |
|
|
45
|
+
|-------|-------------------|---------------------------|
|
|
46
|
+
| "how are files re-indexed on change" | 479 | **~6x cheaper** |
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
## Tools
|
|
17
50
|
|
|
18
|
-
|
|
51
|
+
### Semantic search
|
|
19
52
|
|
|
20
|
-
|
|
53
|
+
Agents search by **meaning**, not by name — useful when the exact function or class is unknown. Results return **file paths with line ranges** pointing at relevant chunks. Treat them as candidates: confirm with `skeleton` or a ranged read before acting. Keep the index fresh with `coderay watch` or `coderay build` when the tree drifts.
|
|
21
54
|
|
|
22
55
|
<img src="assets/coderay-search.gif" alt="coderay search demo" width="100%" />
|
|
23
56
|
|
|
24
57
|
### Blast radius
|
|
25
58
|
|
|
26
|
-
|
|
59
|
+
Shows **callers, imports, and inheritance** for a symbol before it changes. Each result is tied to a file path and line range — combine with `skeleton` or ranged reads on those locations when bodies are needed.
|
|
27
60
|
|
|
28
61
|
<img src="assets/coderay-impact.gif" alt="coderay impact demo" width="100%" />
|
|
29
62
|
|
|
30
63
|
### Skeleton
|
|
31
64
|
|
|
32
|
-
|
|
65
|
+
Returns **signatures and docstrings only** — no function bodies. Every block is tagged with its path and line range so subsequent reads can be scoped to exactly those lines. A full file read should happen only when the skeleton isn't enough.
|
|
33
66
|
|
|
34
67
|
<img src="assets/coderay-skeleton.gif" alt="coderay skeleton demo" width="100%" />
|
|
35
68
|
|
|
36
69
|
### Full read
|
|
37
70
|
|
|
38
|
-
Same file
|
|
71
|
+
**Same file, raw source — for comparison:**
|
|
39
72
|
|
|
40
73
|
<img src="assets/coderay-fullread.gif" alt="same file, raw source head" width="100%" />
|
|
41
74
|
|
|
@@ -50,7 +83,7 @@ Same file as skeleton: raw source costs more tokens.
|
|
|
50
83
|
|
|
51
84
|
## MCP
|
|
52
85
|
|
|
53
|
-
Same tools
|
|
86
|
+
Same three tools over MCP: search, skeleton (paths and line ranges), and impact—so **AI agents** can **narrow context** before full-file reads. Point the server at a checkout whose root contains `.coderay.toml` (`CODERAY_REPO_ROOT` below). For tool choice versus a plain read, see [AGENTS.md](AGENTS.md).
|
|
54
87
|
|
|
55
88
|
```bash
|
|
56
89
|
which coderay-mcp
|
|
@@ -71,37 +104,12 @@ which coderay-mcp
|
|
|
71
104
|
`CODERAY_REPO_ROOT` must be the directory that contains `.coderay.toml`. More detail: [`mcp_server/README.md`](src/coderay/mcp_server/README.md).
|
|
72
105
|
|
|
73
106
|
|
|
74
|
-
## Why this matters
|
|
75
|
-
|
|
76
|
-
Noisy context windows make models confident about the wrong code. CodeRay front-loads **intent** (search), **shape** (skeleton), and **dependencies** (impact) so the expensive read happens after you have a map—not instead of ever reading implementation when control flow matters.
|
|
77
|
-
|
|
78
|
-
### Token savings (tiktoken, `cl100k_base`)
|
|
79
|
-
|
|
80
|
-
Measured on this repo after a full index.
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
| File | Lines | Full read | Skeleton | Savings |
|
|
84
|
-
| ---------------------------------- | ----- | --------- | -------- | -------- |
|
|
85
|
-
| `src/coderay/pipeline/indexer.py` | 400 | 3,024 | 757 | **4.0x** |
|
|
86
|
-
| `src/coderay/graph/code_graph.py` | 500 | 4,261 | 1,022 | **4.2x** |
|
|
87
|
-
| `src/coderay/mcp_server/server.py` | 316 | 2,268 | 1,313 | **1.7x** |
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
| Query | Search hit tokens | vs full `indexer.py` read |
|
|
92
|
-
| ------------------------------------ | ----------------- | ------------------------- |
|
|
93
|
-
| "how are files re-indexed on change" | 479 | **~6x cheaper** |
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
*Not guarantees — model, chunks, and files affect counts.*
|
|
97
|
-
|
|
98
|
-
|
|
99
107
|
## Features
|
|
100
108
|
|
|
101
109
|
- **Languages** — Python, JavaScript, and TypeScript — [`parsing/README.md`](src/coderay/parsing/README.md)
|
|
102
110
|
- **Multi-repo / monorepo** — roots, aliases, optional `include` subtrees — [`core/README.md`](src/coderay/core/README.md)
|
|
103
111
|
- **Hybrid search** — vector + BM25 (RRF), optional boosting — [`retrieval/README.md`](src/coderay/retrieval/README.md)
|
|
104
|
-
- **Embeddings** — fastembed (CPU) or MLX on Apple Silicon — [`embedding/README.md`](src/coderay/embedding/README.md)
|
|
112
|
+
- **Embeddings** — fastembed (CPU) or MLX on Apple Silicon; defaults to MiniLM L6 for speed — configure BGE in `.coderay.toml` for stronger (heavier) vectors — [`embedding/README.md`](src/coderay/embedding/README.md)
|
|
105
113
|
- **Watch** — incremental re-index; `.coderay.toml` is the source of truth for what’s indexed
|
|
106
114
|
|
|
107
115
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "coderay"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.2.1"
|
|
8
8
|
description = "X-ray your codebase — semantic search, code graphs, file skeletons, and MCP server"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -53,6 +53,7 @@ dev = [
|
|
|
53
53
|
"pytest-cov>=4.0",
|
|
54
54
|
"ruff>=0.8.0",
|
|
55
55
|
"mypy>=1.0.0",
|
|
56
|
+
"tiktoken>=0.5.0",
|
|
56
57
|
]
|
|
57
58
|
maintain = [
|
|
58
59
|
"pylance>=0.15.0",
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.2.1"
|
|
@@ -340,7 +340,7 @@ def maintain(ctx: click.Context) -> None:
|
|
|
340
340
|
|
|
341
341
|
|
|
342
342
|
@cli.command()
|
|
343
|
-
@click.argument("file_path", type=
|
|
343
|
+
@click.argument("file_path", type=str)
|
|
344
344
|
@click.option(
|
|
345
345
|
"--include-imports",
|
|
346
346
|
is_flag=True,
|
|
@@ -353,17 +353,53 @@ def maintain(ctx: click.Context) -> None:
|
|
|
353
353
|
default=None,
|
|
354
354
|
help="Filter to a specific class or top-level function by name.",
|
|
355
355
|
)
|
|
356
|
+
@click.option(
|
|
357
|
+
"--lines",
|
|
358
|
+
"line_range",
|
|
359
|
+
default=None,
|
|
360
|
+
metavar="START-END",
|
|
361
|
+
help=(
|
|
362
|
+
"File line range (1-based inclusive); keep only symbols fully within this span."
|
|
363
|
+
" Do not combine with a :START-END suffix on FILE_PATH (same meaning)."
|
|
364
|
+
),
|
|
365
|
+
)
|
|
356
366
|
def skeleton(
|
|
357
|
-
file_path:
|
|
367
|
+
file_path: str,
|
|
358
368
|
include_imports: bool,
|
|
359
369
|
symbol: str | None,
|
|
370
|
+
line_range: str | None,
|
|
360
371
|
) -> None:
|
|
361
372
|
"""Print signatures without bodies (cheaper than reading the full file)."""
|
|
362
373
|
from coderay.skeleton.extractor import extract_skeleton
|
|
374
|
+
from coderay.skeleton.path_range import (
|
|
375
|
+
parse_file_line_range,
|
|
376
|
+
parse_skeleton_file_arg,
|
|
377
|
+
)
|
|
363
378
|
|
|
364
|
-
|
|
379
|
+
try:
|
|
380
|
+
path_str, rng_from_path = parse_skeleton_file_arg(file_path, parse_suffix=True)
|
|
381
|
+
except ValueError as e:
|
|
382
|
+
raise click.BadParameter(str(e)) from e
|
|
383
|
+
file_line_range = rng_from_path
|
|
384
|
+
if line_range:
|
|
385
|
+
if file_line_range is not None:
|
|
386
|
+
raise click.UsageError(
|
|
387
|
+
"Use either a path ending with :START-END or --lines, not both."
|
|
388
|
+
)
|
|
389
|
+
try:
|
|
390
|
+
file_line_range = parse_file_line_range(line_range)
|
|
391
|
+
except ValueError as e:
|
|
392
|
+
raise click.BadParameter(str(e), param_hint="--lines") from e
|
|
393
|
+
resolved = Path(path_str)
|
|
394
|
+
if not resolved.is_file():
|
|
395
|
+
raise click.BadParameter(f"not a file: {path_str}", param_hint="file_path")
|
|
396
|
+
content = resolved.read_text(encoding="utf-8", errors="replace")
|
|
365
397
|
out = extract_skeleton(
|
|
366
|
-
|
|
398
|
+
resolved,
|
|
399
|
+
content,
|
|
400
|
+
include_imports=include_imports,
|
|
401
|
+
symbol=symbol,
|
|
402
|
+
line_range=file_line_range,
|
|
367
403
|
)
|
|
368
404
|
click.echo(out)
|
|
369
405
|
|
|
@@ -38,13 +38,13 @@ backend = "auto"
|
|
|
38
38
|
|
|
39
39
|
[embedder.fastembed]
|
|
40
40
|
# Default embedder. Runs on CPU.
|
|
41
|
-
model_name = "
|
|
41
|
+
model_name = "sentence-transformers/all-MiniLM-L6-v2"
|
|
42
42
|
dimensions = 384
|
|
43
43
|
batch_size = 64
|
|
44
44
|
|
|
45
45
|
[embedder.mlx]
|
|
46
46
|
# Apple Silicon embedder (MLX/Metal; device depends on runtime).
|
|
47
|
-
model_name = "mlx-community/
|
|
47
|
+
model_name = "mlx-community/all-MiniLM-L6-v2-4bit"
|
|
48
48
|
dimensions = 384
|
|
49
49
|
batch_size = 256
|
|
50
50
|
|
|
@@ -27,25 +27,33 @@ Maps code chunks to dense vectors for storage and query.
|
|
|
27
27
|
Run `coderay build --full` after any change to `[embedder]` config. Vectors
|
|
28
28
|
from different models are not compatible.
|
|
29
29
|
|
|
30
|
-
##
|
|
30
|
+
## Defaults and trade-offs
|
|
31
31
|
|
|
32
|
-
The default
|
|
33
|
-
|
|
34
|
-
|
|
32
|
+
The default is **MiniLM L6** (`sentence-transformers/all-MiniLM-L6-v2` on CPU,
|
|
33
|
+
`mlx-community/all-MiniLM-L6-v2-bf16` on MLX): fast indexing and good enough
|
|
34
|
+
semantic search for most workflows. For **stronger embeddings** (often better
|
|
35
|
+
retrieval on code), switch to **BGE Small** — expect a heavier download and more
|
|
36
|
+
compute than MiniLM.
|
|
35
37
|
|
|
36
|
-
| Model | Backend | Size | Dimensions |
|
|
37
|
-
|
|
38
|
-
| `
|
|
39
|
-
| `
|
|
40
|
-
| `mlx-community/
|
|
41
|
-
| `mlx-community/
|
|
38
|
+
| Model | Backend | Size (approx.) | Dimensions | Notes |
|
|
39
|
+
|-------|---------|----------------|------------|-------|
|
|
40
|
+
| `sentence-transformers/all-MiniLM-L6-v2` | fastembed | ~90MB | 384 | **Default.** Fast; widely used. |
|
|
41
|
+
| `BAAI/bge-small-en-v1.5` | fastembed | ~67MB | 384 | Heavier quality focus; strong retrieval in this size class. |
|
|
42
|
+
| `mlx-community/all-MiniLM-L6-v2-bf16` | mlx | ~45MB | 384 | **Default** on Apple Silicon with `coderay[mlx]`. |
|
|
43
|
+
| `mlx-community/bge-small-en-v1.5-bf16` | mlx | ~25MB | 384 | BGE on MLX; better embeddings than MiniLM, more work per batch. |
|
|
44
|
+
| `mlx-community/bge-small-en-v1.5-4bit` | mlx | ~19MB | 384 | 4-bit BGE; smaller download, small quality delta vs bf16. |
|
|
45
|
+
| `mlx-community/all-MiniLM-L6-v2-4bit` | mlx | ~13MB | 384 | Smallest; fastest cold start; lower retrieval quality for code. |
|
|
42
46
|
|
|
43
|
-
To
|
|
47
|
+
To use BGE instead of the defaults, edit `.coderay.toml` and run `coderay build --full`:
|
|
44
48
|
|
|
45
49
|
```toml
|
|
46
|
-
|
|
50
|
+
[embedder.fastembed]
|
|
51
|
+
model_name = "BAAI/bge-small-en-v1.5"
|
|
52
|
+
dimensions = 384
|
|
53
|
+
batch_size = 64
|
|
54
|
+
|
|
47
55
|
[embedder.mlx]
|
|
48
|
-
model_name = "mlx-community/bge-small-en-v1.5-
|
|
56
|
+
model_name = "mlx-community/bge-small-en-v1.5-bf16"
|
|
49
57
|
dimensions = 384
|
|
50
58
|
batch_size = 256
|
|
51
59
|
```
|
|
@@ -37,8 +37,10 @@ mcp = FastMCP(
|
|
|
37
37
|
"\n"
|
|
38
38
|
"- semantic_search: search code by meaning. Best for "
|
|
39
39
|
"'how/where' questions. Use grep for exact symbol lookup.\n"
|
|
40
|
-
"- get_file_skeleton: signatures and docstrings only, no bodies
|
|
41
|
-
"
|
|
40
|
+
"- get_file_skeleton: signatures and docstrings only, no bodies; "
|
|
41
|
+
"absolute path line per symbol (with optional symbol line range suffix) "
|
|
42
|
+
"for filepath:START-END style refs. "
|
|
43
|
+
"Optional file line range narrows output. "
|
|
42
44
|
"Works without the index.\n"
|
|
43
45
|
"- get_impact_radius: reverse dependency traversal from the code "
|
|
44
46
|
"graph. Shows callers/dependents of a function or class. "
|
|
@@ -177,10 +179,11 @@ async def semantic_search(
|
|
|
177
179
|
@mcp.tool(
|
|
178
180
|
description=(
|
|
179
181
|
"Extracts class/function signatures and docstrings from a file — "
|
|
180
|
-
"no bodies.
|
|
181
|
-
"
|
|
182
|
-
"
|
|
183
|
-
"
|
|
182
|
+
"no bodies. Each symbol is preceded by the absolute file path and "
|
|
183
|
+
"symbol line range suffix (1-based inclusive) for filepath:START-END refs. "
|
|
184
|
+
"Optional file line range via path suffix :START-END or file_line_range "
|
|
185
|
+
"(same meaning; do not pass both). Narrows to declarations fully within that"
|
|
186
|
+
"range. Does not require the index."
|
|
184
187
|
),
|
|
185
188
|
annotations=READ_ONLY_ANNOTATIONS,
|
|
186
189
|
tags={"analysis"},
|
|
@@ -188,7 +191,12 @@ async def semantic_search(
|
|
|
188
191
|
async def get_file_skeleton(
|
|
189
192
|
file_path: Annotated[
|
|
190
193
|
str,
|
|
191
|
-
Field(
|
|
194
|
+
Field(
|
|
195
|
+
description=(
|
|
196
|
+
"Path to the file. Optional :START-END suffix (same as file_line_range)"
|
|
197
|
+
"; do not combine with file_line_range."
|
|
198
|
+
),
|
|
199
|
+
),
|
|
192
200
|
],
|
|
193
201
|
include_imports: Annotated[
|
|
194
202
|
bool,
|
|
@@ -206,18 +214,46 @@ async def get_file_skeleton(
|
|
|
206
214
|
),
|
|
207
215
|
),
|
|
208
216
|
] = None,
|
|
217
|
+
file_line_range: Annotated[
|
|
218
|
+
str | None,
|
|
219
|
+
Field(
|
|
220
|
+
description=(
|
|
221
|
+
"Optional file line range as START-END (1-based inclusive). "
|
|
222
|
+
"Do not combine with a :START-END suffix on file_path."
|
|
223
|
+
),
|
|
224
|
+
),
|
|
225
|
+
] = None,
|
|
209
226
|
) -> str:
|
|
210
227
|
"""Get file API surface (signatures, no bodies)."""
|
|
211
228
|
from coderay.skeleton.extractor import extract_skeleton
|
|
229
|
+
from coderay.skeleton.path_range import (
|
|
230
|
+
parse_file_line_range,
|
|
231
|
+
parse_skeleton_file_arg,
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
try:
|
|
235
|
+
path_str, rng_suffix = parse_skeleton_file_arg(file_path, parse_suffix=True)
|
|
236
|
+
except ValueError as e:
|
|
237
|
+
raise ValueError(str(e)) from e
|
|
238
|
+
line_range: tuple[int, int] | None = rng_suffix
|
|
239
|
+
if file_line_range:
|
|
240
|
+
if line_range is not None:
|
|
241
|
+
raise ValueError(
|
|
242
|
+
"Use either file_path :START-END suffix or file_line_range, not both."
|
|
243
|
+
)
|
|
244
|
+
try:
|
|
245
|
+
line_range = parse_file_line_range(file_line_range)
|
|
246
|
+
except ValueError as e:
|
|
247
|
+
raise ValueError(str(e)) from e
|
|
212
248
|
|
|
213
249
|
workspace_root = _resolve_index_dir().parent.resolve()
|
|
214
|
-
candidate = (workspace_root /
|
|
250
|
+
candidate = (workspace_root / path_str).resolve()
|
|
215
251
|
try:
|
|
216
252
|
candidate.relative_to(workspace_root)
|
|
217
253
|
except ValueError:
|
|
218
254
|
raise FileNotFoundError(f"File not found: {file_path}")
|
|
219
255
|
if not candidate.is_file():
|
|
220
|
-
raise FileNotFoundError(f"File not found: {
|
|
256
|
+
raise FileNotFoundError(f"File not found: {path_str}")
|
|
221
257
|
content = await asyncio.to_thread(
|
|
222
258
|
candidate.read_text, encoding="utf-8", errors="replace"
|
|
223
259
|
)
|
|
@@ -227,6 +263,7 @@ async def get_file_skeleton(
|
|
|
227
263
|
content,
|
|
228
264
|
include_imports=include_imports,
|
|
229
265
|
symbol=symbol,
|
|
266
|
+
line_range=line_range,
|
|
230
267
|
)
|
|
231
268
|
|
|
232
269
|
|
|
@@ -37,8 +37,11 @@ class GraphConfig:
|
|
|
37
37
|
|
|
38
38
|
@dataclass
|
|
39
39
|
class SkeletonConfig:
|
|
40
|
-
"""Skeleton
|
|
40
|
+
"""Skeleton: declaration types (chunker-style), docstrings, module pass-through."""
|
|
41
41
|
|
|
42
|
+
# Node types that emit as declarations. JS/TS omits export_statement (unwrap) and
|
|
43
|
+
# lexical_declaration (top_level_expr_types). See chunk_types in this file.
|
|
44
|
+
symbol_types: tuple[str, ...]
|
|
42
45
|
docstring_expr_type: str = "expression_statement"
|
|
43
46
|
top_level_expr_types: tuple[str, ...] = ("expression_statement",)
|
|
44
47
|
body_block_types: tuple[str, ...] = ("block", "statement_block")
|
|
@@ -102,8 +105,16 @@ _PYTHON_CST_DISPATCH = CstDispatchConfig(
|
|
|
102
105
|
)
|
|
103
106
|
|
|
104
107
|
|
|
108
|
+
_PY_CHUNK_TYPES: tuple[str, ...] = (
|
|
109
|
+
"function_definition",
|
|
110
|
+
"class_definition",
|
|
111
|
+
"decorated_definition",
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
|
|
105
115
|
def _python_skeleton() -> SkeletonConfig:
|
|
106
116
|
return SkeletonConfig(
|
|
117
|
+
symbol_types=_PY_CHUNK_TYPES,
|
|
107
118
|
docstring_expr_type="expression_statement",
|
|
108
119
|
top_level_expr_types=("expression_statement",),
|
|
109
120
|
body_block_types=("block",),
|
|
@@ -111,13 +122,7 @@ def _python_skeleton() -> SkeletonConfig:
|
|
|
111
122
|
|
|
112
123
|
|
|
113
124
|
def _python_chunker() -> ChunkerConfig:
|
|
114
|
-
return ChunkerConfig(
|
|
115
|
-
chunk_types=(
|
|
116
|
-
"function_definition",
|
|
117
|
-
"class_definition",
|
|
118
|
-
"decorated_definition",
|
|
119
|
-
),
|
|
120
|
-
)
|
|
125
|
+
return ChunkerConfig(chunk_types=_PY_CHUNK_TYPES)
|
|
121
126
|
|
|
122
127
|
|
|
123
128
|
_PYTHON_GRAPH = GraphConfig(
|
|
@@ -172,8 +177,33 @@ _JS_TS_GRAPH = GraphConfig(
|
|
|
172
177
|
)
|
|
173
178
|
|
|
174
179
|
|
|
180
|
+
# Chunker includes export_statement and lexical_declaration; skeleton unwraps exports
|
|
181
|
+
# and treats top-level lexical_declaration via top_level_expr_types.
|
|
182
|
+
_JS_TS_CHUNK_TYPES: tuple[str, ...] = (
|
|
183
|
+
"function_declaration",
|
|
184
|
+
"class_declaration",
|
|
185
|
+
"method_definition",
|
|
186
|
+
"arrow_function",
|
|
187
|
+
"export_statement",
|
|
188
|
+
"lexical_declaration",
|
|
189
|
+
"interface_declaration",
|
|
190
|
+
"type_alias_declaration",
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
_JS_TS_SKELETON_SYMBOL_TYPES: tuple[str, ...] = (
|
|
194
|
+
"function_declaration",
|
|
195
|
+
"class_declaration",
|
|
196
|
+
"method_definition",
|
|
197
|
+
"arrow_function",
|
|
198
|
+
"interface_declaration",
|
|
199
|
+
"type_alias_declaration",
|
|
200
|
+
"type_declaration",
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
|
|
175
204
|
def _js_ts_skeleton() -> SkeletonConfig:
|
|
176
205
|
return SkeletonConfig(
|
|
206
|
+
symbol_types=_JS_TS_SKELETON_SYMBOL_TYPES,
|
|
177
207
|
docstring_expr_type="expression_statement",
|
|
178
208
|
top_level_expr_types=("expression_statement", "lexical_declaration"),
|
|
179
209
|
body_block_types=("statement_block",),
|
|
@@ -181,18 +211,7 @@ def _js_ts_skeleton() -> SkeletonConfig:
|
|
|
181
211
|
|
|
182
212
|
|
|
183
213
|
def _js_ts_chunker() -> ChunkerConfig:
|
|
184
|
-
return ChunkerConfig(
|
|
185
|
-
chunk_types=(
|
|
186
|
-
"function_declaration",
|
|
187
|
-
"class_declaration",
|
|
188
|
-
"method_definition",
|
|
189
|
-
"arrow_function",
|
|
190
|
-
"export_statement",
|
|
191
|
-
"lexical_declaration",
|
|
192
|
-
"interface_declaration",
|
|
193
|
-
"type_alias_declaration",
|
|
194
|
-
),
|
|
195
|
-
)
|
|
214
|
+
return ChunkerConfig(chunk_types=_JS_TS_CHUNK_TYPES)
|
|
196
215
|
|
|
197
216
|
|
|
198
217
|
@dataclass
|
|
@@ -6,8 +6,9 @@ demand (not stored in the index). Works without a built index.
|
|
|
6
6
|
|
|
7
7
|
## How it works
|
|
8
8
|
|
|
9
|
-
`extractor.py` uses tree-sitter to parse the file, then walks the CST
|
|
10
|
-
|
|
9
|
+
`extractor.py` uses tree-sitter to parse the file, then walks the CST; declaration
|
|
10
|
+
nodes come from `LanguageConfig.skeleton.symbol_types` (per language, like chunk
|
|
11
|
+
types in `parsing/languages.py`), with shape from `cst` function/class/decorator sets.
|
|
11
12
|
Function and method bodies are replaced with `...`. Class headers are kept as
|
|
12
13
|
context even when filtering to a specific symbol.
|
|
13
14
|
|