pi-read-map 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +9 -6
- package/CHANGELOG.md +49 -0
- package/README.md +7 -1
- package/package.json +2 -1
- package/src/language-detect.ts +6 -0
- package/src/mapper.ts +4 -0
- package/src/mappers/clojure.ts +613 -0
- package/src/mappers/ctags.ts +31 -4
package/AGENTS.md
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# AGENTS.md
|
|
2
2
|
|
|
3
|
-
> Last updated: 2026-02-
|
|
3
|
+
> Last updated: 2026-02-14
|
|
4
4
|
|
|
5
5
|
Pi extension that augments the built-in `read` tool with structural file maps for large files (>2,000 lines or >50 KB). Intercepts `read` calls, generates symbol maps via language-specific parsers, and sends them as separate `file-map` messages after the tool result.
|
|
6
6
|
|
|
7
|
-
## Commands (verified 2026-02-
|
|
7
|
+
## Commands (verified 2026-02-14)
|
|
8
8
|
|
|
9
9
|
| Command | Purpose | ~Time |
|
|
10
10
|
|---------|---------|-------|
|
|
@@ -30,10 +30,11 @@ src/
|
|
|
30
30
|
├── types.ts → FileMap, FileSymbol, MapOptions, FileMapMessageDetails
|
|
31
31
|
├── enums.ts → SymbolKind (21 kinds), DetailLevel (5 levels)
|
|
32
32
|
├── constants.ts → THRESHOLDS: lines, bytes, budget tiers
|
|
33
|
-
└── mappers/ → One mapper per language (
|
|
33
|
+
└── mappers/ → One mapper per language (17 total)
|
|
34
34
|
├── typescript.ts → ts-morph (handles TS + JS)
|
|
35
35
|
├── rust.ts → tree-sitter-rust
|
|
36
36
|
├── cpp.ts → tree-sitter-cpp (C++ and .h files)
|
|
37
|
+
├── clojure.ts → tree-sitter-clojure (.clj, .cljs, .cljc, .edn)
|
|
37
38
|
├── python.ts → subprocess: scripts/python_outline.py
|
|
38
39
|
├── go.ts → subprocess: scripts/go_outline.go
|
|
39
40
|
├── json.ts → subprocess: jq
|
|
@@ -58,7 +59,7 @@ tests/
|
|
|
58
59
|
├── e2e/ → Real pi sessions via tmux (vitest.e2e.config.ts)
|
|
59
60
|
├── fixtures/ → Sample files per language
|
|
60
61
|
├── benchmarks/ → Mapper performance benchmarks
|
|
61
|
-
└── helpers/ → Test utilities (pi-runner, constants,
|
|
62
|
+
└── helpers/ → Test utilities (pi-runner, constants, tree-sitter)
|
|
62
63
|
|
|
63
64
|
docs/
|
|
64
65
|
├── plans/ → Implementation plans (phased)
|
|
@@ -86,6 +87,7 @@ Maps are cached in-memory by `(filePath, mtime)`. Delivered as custom `file-map`
|
|
|
86
87
|
|-----|-----------|--------------|
|
|
87
88
|
| New mapper | `src/mappers/csv.ts` | Simple, clean, regex-free in-process parsing |
|
|
88
89
|
| Complex mapper | `src/mappers/typescript.ts` | ts-morph AST walk, nested symbols, modifiers |
|
|
90
|
+
| Tree-sitter mapper | `src/mappers/clojure.ts` | tree-sitter AST walk, reader conditionals, platform modifiers |
|
|
89
91
|
| Subprocess mapper | `src/mappers/python.ts` | Calls external script, parses JSON output |
|
|
90
92
|
| Unit test | `tests/unit/mappers/csv.test.ts` | Fixture-based, edge cases, null returns |
|
|
91
93
|
| Integration test | `tests/integration/budget-enforcement.test.ts` | Tests progressive detail reduction |
|
|
@@ -125,8 +127,9 @@ Maps are cached in-memory by `(filePath, mtime)`. Delivered as custom `file-map`
|
|
|
125
127
|
|
|
126
128
|
- `oxlint` installed as devDependency; `npm run lint` exits cleanly (0 errors, 0 warnings)
|
|
127
129
|
- `tree-sitter` pinned to 0.22.4 due to peer dependency conflicts (see `docs/todo/upgrade-tree-sitter-0.26.md`)
|
|
130
|
+
- `tree-sitter-clojure` pinned to commit SHA from `github:ghoseb/tree-sitter-clojure` (third-party fork)
|
|
128
131
|
- Go outline script auto-compiles on first use; compiled binary checked in at `scripts/go_outline`
|
|
129
|
-
- Phase 1-
|
|
132
|
+
- Phase 1-5 of implementation plan complete; remaining TODOs in `docs/todo/`
|
|
130
133
|
|
|
131
134
|
| Docstrings / JSDoc | `FileSymbol.docstring?: string` | First-line summary of doc comments |
|
|
132
135
|
| Exported flag | `FileSymbol.isExported?: boolean` | Whether symbol is part of public API |
|
|
@@ -151,5 +154,5 @@ Maps are cached in-memory by `(filePath, mtime)`. Delivered as custom `file-map`
|
|
|
151
154
|
- **Language:** TypeScript (strict, `noUncheckedIndexedAccess`)
|
|
152
155
|
- **Testing:** Vitest (unit/integration: 10s timeout, e2e: 60s timeout)
|
|
153
156
|
- **Linting:** oxlint + oxfmt
|
|
154
|
-
- **Parsing:** ts-morph, tree-sitter, regex, subprocess (Python/Go/jq)
|
|
157
|
+
- **Parsing:** ts-morph, tree-sitter (rust, cpp, clojure), regex, subprocess (Python/Go/jq)
|
|
155
158
|
- **Framework:** pi extension API (`@mariozechner/pi-coding-agent`)
|
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
## [1.2.0] - 2026-02-14
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
|
|
9
|
+
- **Clojure mapper** — tree-sitter-based parser for `.clj`, `.cljs`, `.cljc`, and `.edn` files. Extracts `ns`, `defn`, `defn-`, `def`, `defonce`, `defmacro`, `defmulti`, `defmethod`, `defprotocol`, `defrecord`, and `deftype` forms with docstrings, signatures, modifiers, and protocol method children. Supports reader conditionals (`#?`) with per-platform annotations. Contributed by [Baishampayan Ghose](https://github.com/ghoseb). ([#2](https://github.com/Whamp/pi-read-map/pull/2))
|
|
10
|
+
- Clojure demo asset (`clojure/core.clj` from the official Clojure repo)
|
|
11
|
+
|
|
12
|
+
### Changed
|
|
13
|
+
|
|
14
|
+
- Tree-sitter tests use `describe.runIf` for conditional execution
|
|
15
|
+
|
|
16
|
+
## [1.1.0] - 2026-02-10
|
|
17
|
+
|
|
18
|
+
### Added
|
|
19
|
+
|
|
20
|
+
- Docstring extraction (`FileSymbol.docstring`) across all mappers
|
|
21
|
+
- Export flag (`FileSymbol.isExported`) across all mappers
|
|
22
|
+
- Required imports (`FileMap.imports`) across all mappers
|
|
23
|
+
- Skipped read recovery — detects reads cancelled by steering queue and re-issues them
|
|
24
|
+
- JSONL session-aware maps for pi session files
|
|
25
|
+
- Directory read handling with EISDIR error and `ls` fallback
|
|
26
|
+
|
|
27
|
+
### Fixed
|
|
28
|
+
|
|
29
|
+
- Symbol duplication in file map output
|
|
30
|
+
- oxlint warnings and errors resolved across codebase
|
|
31
|
+
|
|
32
|
+
### Changed
|
|
33
|
+
|
|
34
|
+
- Test helpers refactored; `models.ts` renamed to `constants.ts`
|
|
35
|
+
|
|
36
|
+
## [1.0.0] - 2026-02-09
|
|
37
|
+
|
|
38
|
+
Initial release.
|
|
39
|
+
|
|
40
|
+
### Added
|
|
41
|
+
|
|
42
|
+
- Structural file maps for large files (>2,000 lines or >50 KB)
|
|
43
|
+
- 14 language mappers: TypeScript, JavaScript, Python, Go, Rust, C, C++, SQL, JSON, JSONL, YAML, TOML, CSV, Markdown
|
|
44
|
+
- Budget-aware formatting with progressive detail reduction (10 KB full → 100 KB truncated)
|
|
45
|
+
- In-memory caching by file path and modification time
|
|
46
|
+
- Fallback chain: language mapper → universal-ctags → grep
|
|
47
|
+
- Custom `file-map` messages delivered after `tool_result` events
|
|
48
|
+
- E2E test infrastructure via tmux
|
|
49
|
+
- Demo assets from 10 major open-source projects
|
package/README.md
CHANGED
|
@@ -23,7 +23,7 @@ https://github.com/user-attachments/assets/4408f37b-b669-453f-a588-336a5332ae90
|
|
|
23
23
|
## What It Does
|
|
24
24
|
|
|
25
25
|
- **Generates structural maps** showing symbols, classes, functions, and their exact line ranges
|
|
26
|
-
- **Supports
|
|
26
|
+
- **Supports 17 languages** through specialized parsers: TypeScript, JavaScript, Python, Go, Rust, C, C++, Clojure, ClojureScript, SQL, JSON, JSONL, YAML, TOML, CSV, Markdown, EDN
|
|
27
27
|
- **Extracts structural outlines** — functions, classes, and their line ranges — typically under 1% of file size
|
|
28
28
|
- **Enforces budgets** through progressive detail reduction (10 KB full → 15 KB compact → 20 KB minimal → 50 KB outline → 100 KB hard cap)
|
|
29
29
|
- **Caches maps** in memory by file path and modification time for instant re-reads
|
|
@@ -130,6 +130,7 @@ src/
|
|
|
130
130
|
├── go.ts # Go AST via subprocess
|
|
131
131
|
├── rust.ts # tree-sitter
|
|
132
132
|
├── cpp.ts # tree-sitter for C/C++
|
|
133
|
+
├── clojure.ts # tree-sitter for Clojure/ClojureScript/EDN
|
|
133
134
|
├── c.ts # Regex patterns
|
|
134
135
|
├── sql.ts # Regex
|
|
135
136
|
├── json.ts # jq subprocess
|
|
@@ -173,6 +174,7 @@ The extension intercepts `read` calls and decides:
|
|
|
173
174
|
- `tree-sitter` - Parser framework
|
|
174
175
|
- `tree-sitter-cpp` - C/C++ parsing
|
|
175
176
|
- `tree-sitter-rust` - Rust parsing
|
|
177
|
+
- `tree-sitter-clojure` - Clojure parsing
|
|
176
178
|
|
|
177
179
|
**System tools (optional):**
|
|
178
180
|
- `python3` - Python mapper
|
|
@@ -184,6 +186,10 @@ The extension intercepts `read` calls and decides:
|
|
|
184
186
|
|
|
185
187
|
This project was inspired by and built upon the foundation of [codemap](https://github.com/kcosr/codemap) by [kcosr](https://github.com/kcosr). Check out the original project for the ideas that made this possible.
|
|
186
188
|
|
|
189
|
+
### Contributors
|
|
190
|
+
|
|
191
|
+
- [Baishampayan Ghose](https://github.com/ghoseb) — Clojure tree-sitter mapper and [tree-sitter-clojure](https://github.com/ghoseb/tree-sitter-clojure) grammar
|
|
192
|
+
|
|
187
193
|
## License
|
|
188
194
|
|
|
189
195
|
MIT
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-read-map",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.2.0",
|
|
4
4
|
"description": "Pi extension that adds structural file maps for large files",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"pi": {
|
|
@@ -44,6 +44,7 @@
|
|
|
44
44
|
"license": "MIT",
|
|
45
45
|
"dependencies": {
|
|
46
46
|
"tree-sitter": "0.22.4",
|
|
47
|
+
"tree-sitter-clojure": "github:ghoseb/tree-sitter-clojure#78928e6",
|
|
47
48
|
"tree-sitter-cpp": "0.23.4",
|
|
48
49
|
"tree-sitter-rust": "0.23.3",
|
|
49
50
|
"ts-morph": "27.0.2"
|
package/src/language-detect.ts
CHANGED
|
@@ -38,6 +38,12 @@ const EXTENSION_MAP: Record<string, LanguageInfo> = {
|
|
|
38
38
|
".hpp": { id: "cpp", name: "C++" },
|
|
39
39
|
".hxx": { id: "cpp", name: "C++" },
|
|
40
40
|
|
|
41
|
+
// Clojure
|
|
42
|
+
".clj": { id: "clojure", name: "Clojure" },
|
|
43
|
+
".cljs": { id: "clojure", name: "ClojureScript" },
|
|
44
|
+
".cljc": { id: "clojure", name: "Clojure" },
|
|
45
|
+
".edn": { id: "clojure", name: "EDN" },
|
|
46
|
+
|
|
41
47
|
// SQL
|
|
42
48
|
".sql": { id: "sql", name: "SQL" },
|
|
43
49
|
|
package/src/mapper.ts
CHANGED
|
@@ -3,6 +3,7 @@ import type { FileMap, MapOptions } from "./types.js";
|
|
|
3
3
|
import { THRESHOLDS } from "./constants.js";
|
|
4
4
|
import { detectLanguage } from "./language-detect.js";
|
|
5
5
|
import { cMapper } from "./mappers/c.js";
|
|
6
|
+
import { clojureMapper } from "./mappers/clojure.js";
|
|
6
7
|
import { cppMapper } from "./mappers/cpp.js";
|
|
7
8
|
import { csvMapper } from "./mappers/csv.js";
|
|
8
9
|
import { ctagsMapper } from "./mappers/ctags.js";
|
|
@@ -57,6 +58,9 @@ const MAPPERS: Record<string, MapperFn> = {
|
|
|
57
58
|
yaml: yamlMapper,
|
|
58
59
|
toml: tomlMapper,
|
|
59
60
|
csv: csvMapper,
|
|
61
|
+
|
|
62
|
+
// Phase 5: Clojure tree-sitter
|
|
63
|
+
clojure: clojureMapper,
|
|
60
64
|
};
|
|
61
65
|
|
|
62
66
|
/**
|
|
@@ -0,0 +1,613 @@
|
|
|
1
|
+
import { readFile, stat } from "node:fs/promises";
|
|
2
|
+
/**
|
|
3
|
+
* Clojure mapper using tree-sitter for AST extraction.
|
|
4
|
+
*
|
|
5
|
+
* The tree-sitter-clojure grammar parses at the S-expression level:
|
|
6
|
+
* all forms are `list_lit` nodes. We identify def forms by matching
|
|
7
|
+
* the first `sym_lit` child against known Clojure special forms.
|
|
8
|
+
*/
|
|
9
|
+
import { createRequire } from "node:module";
|
|
10
|
+
|
|
11
|
+
import type { FileMap, FileSymbol } from "../types.js";
|
|
12
|
+
|
|
13
|
+
import { DetailLevel, SymbolKind } from "../enums.js";
|
|
14
|
+
|
|
15
|
+
type SyntaxNode = import("tree-sitter").SyntaxNode;
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Def forms we recognize and how they map to symbol kinds.
|
|
19
|
+
*/
|
|
20
|
+
const DEF_FORMS: Record<
|
|
21
|
+
string,
|
|
22
|
+
{ kind: SymbolKind; isPrivate?: boolean; hasChildren?: boolean }
|
|
23
|
+
> = {
|
|
24
|
+
defn: { kind: SymbolKind.Function },
|
|
25
|
+
"defn-": { kind: SymbolKind.Function, isPrivate: true },
|
|
26
|
+
def: { kind: SymbolKind.Variable },
|
|
27
|
+
defonce: { kind: SymbolKind.Variable },
|
|
28
|
+
defmacro: { kind: SymbolKind.Function },
|
|
29
|
+
defmulti: { kind: SymbolKind.Function },
|
|
30
|
+
defmethod: { kind: SymbolKind.Method },
|
|
31
|
+
defprotocol: { kind: SymbolKind.Interface, hasChildren: true },
|
|
32
|
+
defrecord: { kind: SymbolKind.Class },
|
|
33
|
+
deftype: { kind: SymbolKind.Class },
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
// Lazy-loaded parser
|
|
37
|
+
let parser: import("tree-sitter") | null = null;
|
|
38
|
+
let parserInitialized = false;
|
|
39
|
+
|
|
40
|
+
function ensureWritableTypeProperty(parserCtor: unknown): void {
|
|
41
|
+
const syntaxNode = (parserCtor as { SyntaxNode?: { prototype?: object } })
|
|
42
|
+
.SyntaxNode;
|
|
43
|
+
const proto = syntaxNode?.prototype;
|
|
44
|
+
if (!proto) {
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
const desc = Object.getOwnPropertyDescriptor(proto, "type");
|
|
48
|
+
if (!desc || desc.set) {
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
Object.defineProperty(proto, "type", { ...desc, set: () => {} });
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function getParser(): import("tree-sitter") | null {
|
|
55
|
+
if (parserInitialized) {
|
|
56
|
+
return parser;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
parserInitialized = true;
|
|
60
|
+
|
|
61
|
+
const isBun = typeof (globalThis as { Bun?: unknown }).Bun !== "undefined";
|
|
62
|
+
if (isBun) {
|
|
63
|
+
return null;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
try {
|
|
67
|
+
const require = createRequire(import.meta.url);
|
|
68
|
+
const ParserCtor = require("tree-sitter") as typeof import("tree-sitter");
|
|
69
|
+
const Clojure =
|
|
70
|
+
require("tree-sitter-clojure") as import("tree-sitter").Language;
|
|
71
|
+
ensureWritableTypeProperty(ParserCtor);
|
|
72
|
+
parser = new ParserCtor();
|
|
73
|
+
parser.setLanguage(Clojure);
|
|
74
|
+
return parser;
|
|
75
|
+
} catch {
|
|
76
|
+
return null;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function getNodeText(node: SyntaxNode, source: string): string {
|
|
81
|
+
return source.slice(node.startIndex, node.endIndex);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Get the text of a sym_lit's sym_name child.
|
|
86
|
+
*/
|
|
87
|
+
function getSymName(node: SyntaxNode, source: string): string | null {
|
|
88
|
+
if (node.type !== "sym_lit") {
|
|
89
|
+
return null;
|
|
90
|
+
}
|
|
91
|
+
const nameNode = node.namedChildren.find((c) => c.type === "sym_name");
|
|
92
|
+
if (!nameNode) {
|
|
93
|
+
return null;
|
|
94
|
+
}
|
|
95
|
+
return getNodeText(nameNode, source);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Check if a sym_lit has ^:private metadata.
|
|
100
|
+
*/
|
|
101
|
+
function hasPrivateMeta(node: SyntaxNode): boolean {
|
|
102
|
+
return node.namedChildren.some(
|
|
103
|
+
(c) =>
|
|
104
|
+
c.type === "meta_lit" &&
|
|
105
|
+
c.namedChildren.some((v) => v.type === "kwd_lit" && v.text === ":private")
|
|
106
|
+
);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Extract the string content from a str_lit node (strips surrounding quotes).
|
|
111
|
+
*/
|
|
112
|
+
function extractString(node: SyntaxNode, source: string): string {
|
|
113
|
+
const text = getNodeText(node, source);
|
|
114
|
+
return text.slice(1, -1);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Extract the named values from a list_lit (skipping gaps/whitespace).
|
|
119
|
+
*/
|
|
120
|
+
function getValueChildren(node: SyntaxNode): SyntaxNode[] {
|
|
121
|
+
return node.namedChildren.filter(
|
|
122
|
+
(c) => c.type !== "comment" && c.type !== "dis_expr"
|
|
123
|
+
);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Extract protocol method signatures from a defprotocol body.
|
|
128
|
+
*/
|
|
129
|
+
function extractProtocolMethods(
|
|
130
|
+
children: SyntaxNode[],
|
|
131
|
+
source: string
|
|
132
|
+
): FileSymbol[] {
|
|
133
|
+
const methods: FileSymbol[] = [];
|
|
134
|
+
for (const child of children) {
|
|
135
|
+
if (child.type !== "list_lit") {
|
|
136
|
+
continue;
|
|
137
|
+
}
|
|
138
|
+
const values = getValueChildren(child);
|
|
139
|
+
const [firstValue] = values;
|
|
140
|
+
if (!firstValue || firstValue.type !== "sym_lit") {
|
|
141
|
+
continue;
|
|
142
|
+
}
|
|
143
|
+
const name = getSymName(firstValue, source);
|
|
144
|
+
if (!name) {
|
|
145
|
+
continue;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const paramsNode = values.find((v) => v.type === "vec_lit");
|
|
149
|
+
const params = paramsNode ? getNodeText(paramsNode, source) : "";
|
|
150
|
+
const signature = params ? `(${name} ${params})` : `(${name})`;
|
|
151
|
+
|
|
152
|
+
const docNode = values.find(
|
|
153
|
+
(v, i) =>
|
|
154
|
+
v.type === "str_lit" && i > values.indexOf(paramsNode ?? firstValue)
|
|
155
|
+
);
|
|
156
|
+
|
|
157
|
+
methods.push({
|
|
158
|
+
name,
|
|
159
|
+
kind: SymbolKind.Method,
|
|
160
|
+
startLine: child.startPosition.row + 1,
|
|
161
|
+
endLine: child.endPosition.row + 1,
|
|
162
|
+
signature,
|
|
163
|
+
...(docNode ? { docstring: extractString(docNode, source) } : {}),
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
return methods;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Extract a defmethod form into an ExtractedDef-like result.
|
|
171
|
+
*/
|
|
172
|
+
function extractDefmethod(
|
|
173
|
+
node: SyntaxNode,
|
|
174
|
+
values: SyntaxNode[],
|
|
175
|
+
source: string
|
|
176
|
+
): FileSymbol | null {
|
|
177
|
+
const [, nameNode, dispatchNode] = values;
|
|
178
|
+
if (!nameNode || nameNode.type !== "sym_lit") {
|
|
179
|
+
return null;
|
|
180
|
+
}
|
|
181
|
+
const multiName = getSymName(nameNode, source);
|
|
182
|
+
if (!multiName) {
|
|
183
|
+
return null;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
const dispatchVal = dispatchNode
|
|
187
|
+
? getNodeText(dispatchNode, source)
|
|
188
|
+
: "unknown";
|
|
189
|
+
|
|
190
|
+
const paramsNode = values.find((v) => v.type === "vec_lit");
|
|
191
|
+
const params = paramsNode ? getNodeText(paramsNode, source) : "";
|
|
192
|
+
const signature = `(defmethod ${multiName} ${dispatchVal} ${params})`;
|
|
193
|
+
|
|
194
|
+
return {
|
|
195
|
+
name: `${multiName} ${dispatchVal}`,
|
|
196
|
+
kind: SymbolKind.Method,
|
|
197
|
+
startLine: node.startPosition.row + 1,
|
|
198
|
+
endLine: node.endPosition.row + 1,
|
|
199
|
+
signature,
|
|
200
|
+
isExported: true,
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* Extract docstring from values after the name.
|
|
206
|
+
*
|
|
207
|
+
* For function-like forms (defn, defmacro, defprotocol, defmulti):
|
|
208
|
+
* docstring is the first str_lit that appears before any vec_lit or list_lit.
|
|
209
|
+
*
|
|
210
|
+
* For value forms (def, defonce):
|
|
211
|
+
* a str_lit is a docstring only if another value follows it.
|
|
212
|
+
* e.g. (def x "doc" 42) → docstring="doc", but (def x "val") → no docstring.
|
|
213
|
+
*/
|
|
214
|
+
function extractDocstring(
|
|
215
|
+
restValues: SyntaxNode[],
|
|
216
|
+
source: string,
|
|
217
|
+
isValueForm: boolean
|
|
218
|
+
): string | undefined {
|
|
219
|
+
const firstStr = restValues.find((v) => v.type === "str_lit");
|
|
220
|
+
if (!firstStr) {
|
|
221
|
+
return undefined;
|
|
222
|
+
}
|
|
223
|
+
const firstVec = restValues.find((v) => v.type === "vec_lit");
|
|
224
|
+
const firstList = restValues.find((v) => v.type === "list_lit");
|
|
225
|
+
|
|
226
|
+
const strIdx = restValues.indexOf(firstStr);
|
|
227
|
+
const vecIdx = firstVec ? restValues.indexOf(firstVec) : Infinity;
|
|
228
|
+
const listIdx = firstList ? restValues.indexOf(firstList) : Infinity;
|
|
229
|
+
|
|
230
|
+
if (strIdx >= vecIdx || strIdx >= listIdx) {
|
|
231
|
+
return undefined;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// For def/defonce: the string is a docstring only if a value follows it
|
|
235
|
+
if (isValueForm) {
|
|
236
|
+
const hasValueAfter = restValues.some(
|
|
237
|
+
(v, i) => i > strIdx && v.type !== "comment" && v.type !== "dis_expr"
|
|
238
|
+
);
|
|
239
|
+
if (!hasValueAfter) {
|
|
240
|
+
return undefined;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
return extractString(firstStr, source);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* Build signature for function-like forms (defn, defn-, defmacro).
|
|
249
|
+
*/
|
|
250
|
+
function buildFnSignature(
|
|
251
|
+
formName: string,
|
|
252
|
+
name: string,
|
|
253
|
+
restValues: SyntaxNode[],
|
|
254
|
+
source: string
|
|
255
|
+
): string {
|
|
256
|
+
const firstVec = restValues.find((v) => v.type === "vec_lit");
|
|
257
|
+
if (firstVec) {
|
|
258
|
+
return `(${formName} ${name} ${getNodeText(firstVec, source)})`;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// Multi-arity: look for list_lit children starting with vec_lit
|
|
262
|
+
const arities = restValues.filter(
|
|
263
|
+
(v) =>
|
|
264
|
+
v.type === "list_lit" &&
|
|
265
|
+
getValueChildren(v).some((c) => c.type === "vec_lit")
|
|
266
|
+
);
|
|
267
|
+
if (arities.length > 0) {
|
|
268
|
+
const arityStrs = arities.map((a) => {
|
|
269
|
+
const vec = getValueChildren(a).find((c) => c.type === "vec_lit");
|
|
270
|
+
return vec ? getNodeText(vec, source) : "[]";
|
|
271
|
+
});
|
|
272
|
+
return `(${formName} ${name} ${arityStrs.join(" ")})`;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
return `(${formName} ${name})`;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* Build signature for non-function def forms.
|
|
280
|
+
*/
|
|
281
|
+
function buildDefSignature(
|
|
282
|
+
formName: string,
|
|
283
|
+
name: string,
|
|
284
|
+
restValues: SyntaxNode[],
|
|
285
|
+
source: string
|
|
286
|
+
): string {
|
|
287
|
+
if (formName === "defmulti") {
|
|
288
|
+
const dispatchNode = restValues.find((v) => v.type !== "str_lit");
|
|
289
|
+
const dispatch = dispatchNode ? getNodeText(dispatchNode, source) : "";
|
|
290
|
+
return dispatch ? `(defmulti ${name} ${dispatch})` : `(defmulti ${name})`;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
if (formName === "defprotocol") {
|
|
294
|
+
return `(defprotocol ${name})`;
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
if (formName === "defrecord" || formName === "deftype") {
|
|
298
|
+
const firstVec = restValues.find((v) => v.type === "vec_lit");
|
|
299
|
+
return firstVec
|
|
300
|
+
? `(${formName} ${name} ${getNodeText(firstVec, source)})`
|
|
301
|
+
: `(${formName} ${name})`;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
// def, defonce
|
|
305
|
+
return `(${formName} ${name})`;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
const FN_FORMS = new Set(["defn", "defn-", "defmacro"]);
|
|
309
|
+
|
|
310
|
+
/**
|
|
311
|
+
* Try to extract a def form from a list_lit node.
|
|
312
|
+
*/
|
|
313
|
+
function extractDef(node: SyntaxNode, source: string): FileSymbol | null {
|
|
314
|
+
if (node.type !== "list_lit") {
|
|
315
|
+
return null;
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
const values = getValueChildren(node);
|
|
319
|
+
if (values.length < 2) {
|
|
320
|
+
return null;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
const [formNode, nameNode] = values;
|
|
324
|
+
if (!formNode || formNode.type !== "sym_lit") {
|
|
325
|
+
return null;
|
|
326
|
+
}
|
|
327
|
+
const formName = getSymName(formNode, source);
|
|
328
|
+
if (!formName) {
|
|
329
|
+
return null;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
const defInfo = DEF_FORMS[formName];
|
|
333
|
+
if (!defInfo) {
|
|
334
|
+
return null;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
// defmethod has unique structure
|
|
338
|
+
if (formName === "defmethod") {
|
|
339
|
+
return extractDefmethod(node, values, source);
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
if (!nameNode || nameNode.type !== "sym_lit") {
|
|
343
|
+
return null;
|
|
344
|
+
}
|
|
345
|
+
const name = getSymName(nameNode, source);
|
|
346
|
+
if (!name) {
|
|
347
|
+
return null;
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
const isPrivate = defInfo.isPrivate === true || hasPrivateMeta(nameNode);
|
|
351
|
+
const modifiers: string[] = [];
|
|
352
|
+
if (isPrivate) {
|
|
353
|
+
modifiers.push("private");
|
|
354
|
+
}
|
|
355
|
+
if (formName === "defmacro") {
|
|
356
|
+
modifiers.push("macro");
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
const restValues = values.slice(2);
|
|
360
|
+
const isValueForm = formName === "def" || formName === "defonce";
|
|
361
|
+
const docstring = extractDocstring(restValues, source, isValueForm);
|
|
362
|
+
|
|
363
|
+
const signature = FN_FORMS.has(formName)
|
|
364
|
+
? buildFnSignature(formName, name, restValues, source)
|
|
365
|
+
: buildDefSignature(formName, name, restValues, source);
|
|
366
|
+
|
|
367
|
+
// Only defprotocol has extractable method children.
|
|
368
|
+
// defrecord/deftype inline protocol methods are not yet extracted.
|
|
369
|
+
let children: FileSymbol[] | undefined;
|
|
370
|
+
if (defInfo.hasChildren && formName === "defprotocol") {
|
|
371
|
+
const extracted = extractProtocolMethods(restValues, source);
|
|
372
|
+
children = extracted.length > 0 ? extracted : undefined;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
const symbol: FileSymbol = {
|
|
376
|
+
name,
|
|
377
|
+
kind: defInfo.kind,
|
|
378
|
+
startLine: node.startPosition.row + 1,
|
|
379
|
+
endLine: node.endPosition.row + 1,
|
|
380
|
+
isExported: !isPrivate,
|
|
381
|
+
};
|
|
382
|
+
|
|
383
|
+
if (signature) {
|
|
384
|
+
symbol.signature = signature;
|
|
385
|
+
}
|
|
386
|
+
if (docstring) {
|
|
387
|
+
symbol.docstring = docstring;
|
|
388
|
+
}
|
|
389
|
+
if (modifiers.length > 0) {
|
|
390
|
+
symbol.modifiers = modifiers;
|
|
391
|
+
}
|
|
392
|
+
if (children) {
|
|
393
|
+
symbol.children = children;
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
return symbol;
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
/**
|
|
400
|
+
* Extract ns form for namespace and imports.
|
|
401
|
+
*/
|
|
402
|
+
function extractNs(
|
|
403
|
+
node: SyntaxNode,
|
|
404
|
+
source: string
|
|
405
|
+
): { namespace: string; imports: string[]; docstring?: string } | null {
|
|
406
|
+
if (node.type !== "list_lit") {
|
|
407
|
+
return null;
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
const values = getValueChildren(node);
|
|
411
|
+
if (values.length < 2) {
|
|
412
|
+
return null;
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
const [formNode, nameNode, docNode] = values;
|
|
416
|
+
if (!formNode || formNode.type !== "sym_lit") {
|
|
417
|
+
return null;
|
|
418
|
+
}
|
|
419
|
+
if (getSymName(formNode, source) !== "ns") {
|
|
420
|
+
return null;
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
if (!nameNode || nameNode.type !== "sym_lit") {
|
|
424
|
+
return null;
|
|
425
|
+
}
|
|
426
|
+
const namespace = getSymName(nameNode, source);
|
|
427
|
+
if (!namespace) {
|
|
428
|
+
return null;
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
// Docstring at index 2
|
|
432
|
+
let docstring: string | undefined;
|
|
433
|
+
if (docNode?.type === "str_lit") {
|
|
434
|
+
docstring = extractString(docNode, source);
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
// Extract :require and :import clauses
|
|
438
|
+
const imports: string[] = [];
|
|
439
|
+
for (const child of values) {
|
|
440
|
+
if (child.type !== "list_lit") {
|
|
441
|
+
continue;
|
|
442
|
+
}
|
|
443
|
+
const listValues = getValueChildren(child);
|
|
444
|
+
const [kwd] = listValues;
|
|
445
|
+
if (!kwd || kwd.type !== "kwd_lit") {
|
|
446
|
+
continue;
|
|
447
|
+
}
|
|
448
|
+
const kwdText = getNodeText(kwd, source);
|
|
449
|
+
if (kwdText === ":require" || kwdText === ":import") {
|
|
450
|
+
for (const spec of listValues.slice(1)) {
|
|
451
|
+
// Unwrap reader conditionals inside require/import:
|
|
452
|
+
// #?(:clj [clojure.java.io] :cljs [cljs.reader])
|
|
453
|
+
// → each platform's specs get added with a platform tag
|
|
454
|
+
if (
|
|
455
|
+
spec.type === "read_cond_lit" ||
|
|
456
|
+
spec.type === "splicing_read_cond_lit"
|
|
457
|
+
) {
|
|
458
|
+
const rcChildren = getValueChildren(spec);
|
|
459
|
+
let platform: string | undefined;
|
|
460
|
+
for (const rc of rcChildren) {
|
|
461
|
+
if (rc.type === "kwd_lit") {
|
|
462
|
+
platform = getNodeText(rc, source);
|
|
463
|
+
continue;
|
|
464
|
+
}
|
|
465
|
+
if (platform) {
|
|
466
|
+
imports.push(`${getNodeText(rc, source)} ${platform}`);
|
|
467
|
+
platform = undefined;
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
} else {
|
|
471
|
+
imports.push(getNodeText(spec, source));
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
return { namespace, imports, docstring };
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
/**
|
|
481
|
+
* Extract def forms from a reader conditional (#? or #?@).
|
|
482
|
+
*
|
|
483
|
+
* Reader conditionals contain kwd_lit/form pairs like:
|
|
484
|
+
* #?(:clj (defn foo [x] ...) :cljs (defn foo [x] ...))
|
|
485
|
+
*
|
|
486
|
+
* We extract defs from all platform branches, annotating each with
|
|
487
|
+
* its platform keyword as a modifier. When the same name appears in
|
|
488
|
+
* multiple branches, all variants are included — the map consumer
|
|
489
|
+
* sees the full picture.
|
|
490
|
+
*/
|
|
491
|
+
function extractReaderConditionalDefs(
|
|
492
|
+
node: SyntaxNode,
|
|
493
|
+
source: string
|
|
494
|
+
): FileSymbol[] {
|
|
495
|
+
const results: FileSymbol[] = [];
|
|
496
|
+
const children = getValueChildren(node);
|
|
497
|
+
|
|
498
|
+
// Children alternate: kwd_lit, form, kwd_lit, form, ...
|
|
499
|
+
let currentPlatform: string | undefined;
|
|
500
|
+
for (const child of children) {
|
|
501
|
+
if (child.type === "kwd_lit") {
|
|
502
|
+
currentPlatform = getNodeText(child, source);
|
|
503
|
+
continue;
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
if (child.type === "list_lit") {
|
|
507
|
+
const def = extractDef(child, source);
|
|
508
|
+
if (def && currentPlatform) {
|
|
509
|
+
const platformName = currentPlatform.replace(/^:/, "");
|
|
510
|
+
const platformMod = `platform-${platformName}`;
|
|
511
|
+
def.modifiers = def.modifiers
|
|
512
|
+
? [...def.modifiers, platformMod]
|
|
513
|
+
: [platformMod];
|
|
514
|
+
results.push(def);
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
return results;
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
/**
|
|
523
|
+
* Generate a file map for Clojure files using tree-sitter.
|
|
524
|
+
*/
|
|
525
|
+
export async function clojureMapper(
|
|
526
|
+
filePath: string,
|
|
527
|
+
signal?: AbortSignal
|
|
528
|
+
): Promise<FileMap | null> {
|
|
529
|
+
try {
|
|
530
|
+
const p = getParser();
|
|
531
|
+
if (!p) {
|
|
532
|
+
return null;
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
const stats = await stat(filePath);
|
|
536
|
+
const totalBytes = stats.size;
|
|
537
|
+
|
|
538
|
+
const content = await readFile(filePath, "utf8");
|
|
539
|
+
|
|
540
|
+
if (signal?.aborted) {
|
|
541
|
+
return null;
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
let tree: import("tree-sitter").Tree;
|
|
545
|
+
try {
|
|
546
|
+
tree = p.parse(content);
|
|
547
|
+
} catch {
|
|
548
|
+
return null;
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
const symbols: FileSymbol[] = [];
|
|
552
|
+
const imports: string[] = [];
|
|
553
|
+
|
|
554
|
+
for (const child of tree.rootNode.namedChildren) {
|
|
555
|
+
// Reader conditionals: extract defs from all platform branches
|
|
556
|
+
if (
|
|
557
|
+
child.type === "read_cond_lit" ||
|
|
558
|
+
child.type === "splicing_read_cond_lit"
|
|
559
|
+
) {
|
|
560
|
+
const condDefs = extractReaderConditionalDefs(child, content);
|
|
561
|
+
symbols.push(...condDefs);
|
|
562
|
+
continue;
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
if (child.type !== "list_lit") {
|
|
566
|
+
continue;
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
const ns = extractNs(child, content);
|
|
570
|
+
if (ns) {
|
|
571
|
+
imports.push(...ns.imports);
|
|
572
|
+
|
|
573
|
+
symbols.push({
|
|
574
|
+
name: ns.namespace,
|
|
575
|
+
kind: SymbolKind.Namespace,
|
|
576
|
+
startLine: child.startPosition.row + 1,
|
|
577
|
+
endLine: child.endPosition.row + 1,
|
|
578
|
+
signature: `(ns ${ns.namespace})`,
|
|
579
|
+
...(ns.docstring ? { docstring: ns.docstring } : {}),
|
|
580
|
+
isExported: true,
|
|
581
|
+
});
|
|
582
|
+
continue;
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
const def = extractDef(child, content);
|
|
586
|
+
if (def) {
|
|
587
|
+
symbols.push(def);
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
if (symbols.length === 0) {
|
|
592
|
+
return null;
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
const totalLines = content.split("\n").length;
|
|
596
|
+
|
|
597
|
+
return {
|
|
598
|
+
path: filePath,
|
|
599
|
+
totalLines,
|
|
600
|
+
totalBytes,
|
|
601
|
+
language: "Clojure",
|
|
602
|
+
symbols,
|
|
603
|
+
imports,
|
|
604
|
+
detailLevel: DetailLevel.Full,
|
|
605
|
+
};
|
|
606
|
+
} catch (error) {
|
|
607
|
+
if (signal?.aborted) {
|
|
608
|
+
return null;
|
|
609
|
+
}
|
|
610
|
+
console.error(`Clojure mapper failed: ${error}`);
|
|
611
|
+
return null;
|
|
612
|
+
}
|
|
613
|
+
}
|
package/src/mappers/ctags.ts
CHANGED
|
@@ -19,10 +19,12 @@ const execAsync = promisify(exec);
|
|
|
19
19
|
let ctagsAvailable: boolean | null = null;
|
|
20
20
|
|
|
21
21
|
/**
|
|
22
|
-
* Map ctags kind
|
|
22
|
+
* Map ctags kind identifiers to SymbolKind.
|
|
23
|
+
* Handles both single-letter kinds (legacy format) and full-word kinds (JSON format).
|
|
23
24
|
* See: https://docs.ctags.io/en/latest/man/ctags.1.html
|
|
24
25
|
*/
|
|
25
26
|
const CTAGS_KIND_MAP: Record<string, SymbolKind> = {
|
|
27
|
+
// Single-letter kinds (legacy/traditional format)
|
|
26
28
|
c: SymbolKind.Class,
|
|
27
29
|
d: SymbolKind.Constant, // macro definition
|
|
28
30
|
e: SymbolKind.Enum,
|
|
@@ -44,7 +46,7 @@ const CTAGS_KIND_MAP: Record<string, SymbolKind> = {
|
|
|
44
46
|
P: SymbolKind.Property, // property
|
|
45
47
|
S: SymbolKind.Struct, // struct
|
|
46
48
|
T: SymbolKind.Type, // type
|
|
47
|
-
// Language-specific mappings
|
|
49
|
+
// Language-specific single-letter mappings
|
|
48
50
|
a: SymbolKind.Type, // alias
|
|
49
51
|
b: SymbolKind.Variable, // block (Ruby)
|
|
50
52
|
h: SymbolKind.Module, // header (C)
|
|
@@ -53,6 +55,30 @@ const CTAGS_KIND_MAP: Record<string, SymbolKind> = {
|
|
|
53
55
|
u: SymbolKind.Type, // union
|
|
54
56
|
w: SymbolKind.Property, // field
|
|
55
57
|
z: SymbolKind.Property, // parameter
|
|
58
|
+
|
|
59
|
+
// Full-word kinds (JSON output format)
|
|
60
|
+
class: SymbolKind.Class,
|
|
61
|
+
enum: SymbolKind.Enum,
|
|
62
|
+
enumerator: SymbolKind.Enum,
|
|
63
|
+
function: SymbolKind.Function,
|
|
64
|
+
interface: SymbolKind.Interface,
|
|
65
|
+
macro: SymbolKind.Constant,
|
|
66
|
+
member: SymbolKind.Property,
|
|
67
|
+
method: SymbolKind.Method,
|
|
68
|
+
module: SymbolKind.Module,
|
|
69
|
+
namespace: SymbolKind.Namespace,
|
|
70
|
+
package: SymbolKind.Module,
|
|
71
|
+
property: SymbolKind.Property,
|
|
72
|
+
struct: SymbolKind.Struct,
|
|
73
|
+
type: SymbolKind.Type,
|
|
74
|
+
typedef: SymbolKind.Type,
|
|
75
|
+
union: SymbolKind.Type,
|
|
76
|
+
variable: SymbolKind.Variable,
|
|
77
|
+
field: SymbolKind.Property,
|
|
78
|
+
constant: SymbolKind.Constant,
|
|
79
|
+
prototype: SymbolKind.Function,
|
|
80
|
+
alias: SymbolKind.Type,
|
|
81
|
+
trait: SymbolKind.Interface,
|
|
56
82
|
};
|
|
57
83
|
|
|
58
84
|
interface CtagsEntry {
|
|
@@ -149,9 +175,10 @@ export async function ctagsMapper(
|
|
|
149
175
|
return null;
|
|
150
176
|
}
|
|
151
177
|
|
|
152
|
-
// Run ctags with JSON output
|
|
178
|
+
// Run ctags with JSON output and line numbers
|
|
153
179
|
// --output-format=json requires Universal Ctags 5.9+
|
|
154
|
-
|
|
180
|
+
// --fields=+n ensures line numbers are included in JSON output
|
|
181
|
+
const cmd = `ctags --output-format=json --fields=+n -f - "${filePath}" 2>/dev/null`;
|
|
155
182
|
|
|
156
183
|
let stdout: string;
|
|
157
184
|
try {
|