@rarusoft/dendrite-wiki 0.1.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +79 -0
- package/dist/api-extractor/extract.js +269 -0
- package/dist/api-extractor/language-extractor.js +15 -0
- package/dist/api-extractor/python-extractor.js +358 -0
- package/dist/api-extractor/render.js +195 -0
- package/dist/api-extractor/tree-sitter-extractor.js +1079 -0
- package/dist/api-extractor/types.js +11 -0
- package/dist/api-extractor/typescript-extractor.js +50 -0
- package/dist/api-extractor/walk.js +178 -0
- package/dist/api-reference.js +438 -0
- package/dist/benchmark-events.js +129 -0
- package/dist/benchmark.js +270 -0
- package/dist/binder-export.js +381 -0
- package/dist/canonical-target.js +168 -0
- package/dist/chart-insert.js +377 -0
- package/dist/chart-prompts.js +414 -0
- package/dist/context-cache.js +98 -0
- package/dist/contradicts-shipped-memory.js +232 -0
- package/dist/diff-context.js +142 -0
- package/dist/doctor.js +220 -0
- package/dist/generated-docs.js +219 -0
- package/dist/i18n.js +71 -0
- package/dist/index.js +49 -0
- package/dist/librarian.js +255 -0
- package/dist/maintenance-actions.js +244 -0
- package/dist/maintenance-inbox.js +842 -0
- package/dist/maintenance-runner.js +62 -0
- package/dist/page-drift.js +225 -0
- package/dist/page-inbox.js +168 -0
- package/dist/report-export.js +339 -0
- package/dist/review-bridge.js +1386 -0
- package/dist/search-index.js +199 -0
- package/dist/store.js +1617 -0
- package/dist/telemetry-defaults.js +44 -0
- package/dist/telemetry-report.js +263 -0
- package/dist/telemetry.js +544 -0
- package/dist/wiki-synthesis.js +901 -0
- package/package.json +35 -0
- package/src/api-extractor/extract.ts +333 -0
- package/src/api-extractor/language-extractor.ts +37 -0
- package/src/api-extractor/python-extractor.ts +380 -0
- package/src/api-extractor/render.ts +267 -0
- package/src/api-extractor/tree-sitter-extractor.ts +1210 -0
- package/src/api-extractor/types.ts +41 -0
- package/src/api-extractor/typescript-extractor.ts +56 -0
- package/src/api-extractor/walk.ts +209 -0
- package/src/api-reference.ts +552 -0
- package/src/benchmark-events.ts +216 -0
- package/src/benchmark.ts +376 -0
- package/src/binder-export.ts +437 -0
- package/src/canonical-target.ts +192 -0
- package/src/chart-insert.ts +478 -0
- package/src/chart-prompts.ts +417 -0
- package/src/context-cache.ts +129 -0
- package/src/contradicts-shipped-memory.ts +311 -0
- package/src/diff-context.ts +187 -0
- package/src/doctor.ts +260 -0
- package/src/generated-docs.ts +316 -0
- package/src/i18n.ts +106 -0
- package/src/index.ts +59 -0
- package/src/librarian.ts +331 -0
- package/src/maintenance-actions.ts +314 -0
- package/src/maintenance-inbox.ts +1132 -0
- package/src/maintenance-runner.ts +85 -0
- package/src/page-drift.ts +292 -0
- package/src/page-inbox.ts +254 -0
- package/src/report-export.ts +392 -0
- package/src/review-bridge.ts +1729 -0
- package/src/search-index.ts +266 -0
- package/src/store.ts +2171 -0
- package/src/telemetry-defaults.ts +50 -0
- package/src/telemetry-report.ts +365 -0
- package/src/telemetry.ts +757 -0
- package/src/wiki-synthesis.ts +1307 -0
|
@@ -0,0 +1,1079 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Generic `LanguageExtractor` powered by tree-sitter — the long-tail language layer.
|
|
3
|
+
*
|
|
4
|
+
* Where `typescript-extractor.ts` and `python-extractor.ts` are handcrafted for top-traffic
|
|
5
|
+
* languages with first-class compiler/AST surfaces, this module covers the long tail
|
|
6
|
+
* (Rust today; Go, Java, Ruby, C, C++, PHP next) via tree-sitter's portable WASM grammars
|
|
7
|
+
* and each grammar's upstream `queries/tags.scm` file. Every supported language lives as a
|
|
8
|
+
* single config-table entry — extension, vendored WASM path, vendored tags.scm path, a
|
|
9
|
+
* public-symbol predicate, a doc-comment association rule. Adding another language is a
|
|
10
|
+
* config addition, not a new module.
|
|
11
|
+
*
|
|
12
|
+
* Rationale (Phase B1 of the API reference roadmap): the per-language handcrafted path
|
|
13
|
+
* doesn't scale. GitHub's stack-graphs project — their multi-year attempt at bespoke
|
|
14
|
+
* per-language indexers — was archived in September 2025; even GitHub couldn't sustain it.
|
|
15
|
+
* Tree-sitter `tags.scm` is the durable middle tier the industry settled on. Output
|
|
16
|
+
* quality matches roughly what our handcrafted Python extractor produces (signatures with
|
|
17
|
+
* types-as-written, doc comments as prose), which is the bar for "binder-on-shelf"
|
|
18
|
+
* presentability.
|
|
19
|
+
*
|
|
20
|
+
* Determinism: parse trees change between grammar versions, so each vendored grammar is
|
|
21
|
+
* pinned by upstream tag and sha256 (recorded in `NOTICE` at the repo root).
|
|
22
|
+
* Same `(web-tree-sitter version, grammar tag, tags.scm sha256)` triple = same parse tree
|
|
23
|
+
* across machines. WASM grammars lazy-load on first use so projects that never touch a
|
|
24
|
+
* given language never pay its load cost.
|
|
25
|
+
*/
|
|
26
|
+
import { existsSync } from 'node:fs';
|
|
27
|
+
import { promises as fs } from 'node:fs';
|
|
28
|
+
import path from 'node:path';
|
|
29
|
+
import { fileURLToPath } from 'node:url';
|
|
30
|
+
import { Language, Parser, Query } from 'web-tree-sitter';
|
|
31
|
+
import { walkProjectSources } from './walk.js';
|
|
32
|
+
function rustIsPublic(definitionNode, _source, _name) {
|
|
33
|
+
for (let i = 0; i < definitionNode.namedChildCount; i += 1) {
|
|
34
|
+
const child = definitionNode.namedChild(i);
|
|
35
|
+
if (child && child.type === 'visibility_modifier') {
|
|
36
|
+
// `pub`, `pub(crate)`, `pub(super)`, `pub(in path)` all count for our purposes —
|
|
37
|
+
// any pub-prefixed visibility is part of the crate's public-or-internal API contract.
|
|
38
|
+
return child.text.startsWith('pub');
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return false;
|
|
42
|
+
}
|
|
43
|
+
const RUST_CONFIG = {
|
|
44
|
+
id: 'rust',
|
|
45
|
+
extensions: ['.rs'],
|
|
46
|
+
projectSignals: ['Cargo.toml'],
|
|
47
|
+
vendorSubdir: 'rust',
|
|
48
|
+
walkOptions: {
|
|
49
|
+
include: ['src/**/*.rs', 'examples/**/*.rs', 'lib.rs', 'main.rs'],
|
|
50
|
+
exclude: ['**/target/**', '**/tests/**', '**/*_test.rs', '**/build.rs', '**/node_modules/**'],
|
|
51
|
+
respectInternalConvention: false
|
|
52
|
+
},
|
|
53
|
+
// Rust's tags.scm maps:
|
|
54
|
+
// struct/enum/union/type → @definition.class
|
|
55
|
+
// trait → @definition.interface
|
|
56
|
+
// function → @definition.function
|
|
57
|
+
// method (inside an impl block) → @definition.method
|
|
58
|
+
// module → @definition.module (we drop these)
|
|
59
|
+
// macro → @definition.macro (we drop these for now; they don't fit the existing kind set)
|
|
60
|
+
captureKindMap: {
|
|
61
|
+
'definition.class': 'class',
|
|
62
|
+
'definition.interface': 'interface',
|
|
63
|
+
'definition.function': 'function',
|
|
64
|
+
'definition.method': 'function'
|
|
65
|
+
},
|
|
66
|
+
docComment: {
|
|
67
|
+
// Rust: `///` for outer doc, `//!` for inner doc. We only attach outer doc to a
|
|
68
|
+
// definition; inner doc is module-level and surfaces via fileDocComment instead.
|
|
69
|
+
linePrefixes: ['///']
|
|
70
|
+
},
|
|
71
|
+
bodyNodeTypes: new Set(['block', 'field_declaration_list', 'declaration_list', 'enum_variant_list', 'trait_block']),
|
|
72
|
+
isPublic: rustIsPublic
|
|
73
|
+
};
|
|
74
|
+
// --- Go --------------------------------------------------------------------
|
|
75
|
+
function goIsPublic(_definitionNode, _source, name) {
|
|
76
|
+
// Go's "exported" rule is purely lexical: an identifier whose first letter is uppercase
|
|
77
|
+
// (Unicode-uppercase via `IsUpper`) is exported from its package. We match that exactly.
|
|
78
|
+
return name.length > 0 && name[0] === name[0].toUpperCase() && name[0] !== name[0].toLowerCase();
|
|
79
|
+
}
|
|
80
|
+
const GO_CONFIG = {
|
|
81
|
+
id: 'go',
|
|
82
|
+
extensions: ['.go'],
|
|
83
|
+
projectSignals: ['go.mod'],
|
|
84
|
+
vendorSubdir: 'go',
|
|
85
|
+
walkOptions: {
|
|
86
|
+
include: ['**/*.go'],
|
|
87
|
+
exclude: ['**/*_test.go', '**/vendor/**', '**/node_modules/**'],
|
|
88
|
+
respectInternalConvention: false
|
|
89
|
+
},
|
|
90
|
+
captureKindMap: {
|
|
91
|
+
'definition.function': 'function',
|
|
92
|
+
'definition.method': 'function',
|
|
93
|
+
// Go's grammar uses `definition.type` for type_spec — that covers struct, interface,
|
|
94
|
+
// type alias, and named-type all under one capture. Mapping all of them to `class`
|
|
95
|
+
// matches what readers care about: "this is a type defined in this package."
|
|
96
|
+
'definition.type': 'class'
|
|
97
|
+
},
|
|
98
|
+
docComment: {
|
|
99
|
+
// Go's documentation convention is plain `//` comments immediately preceding the
|
|
100
|
+
// declaration, with text starting on the same line as the symbol's name. No special
|
|
101
|
+
// prefix character.
|
|
102
|
+
linePrefixes: ['//']
|
|
103
|
+
},
|
|
104
|
+
bodyNodeTypes: new Set(['block', 'field_declaration_list', 'method_spec_list', 'interface_type', 'struct_type']),
|
|
105
|
+
isPublic: goIsPublic
|
|
106
|
+
};
|
|
107
|
+
// --- Java ------------------------------------------------------------------
|
|
108
|
+
function javaIsPublic(definitionNode, _source, _name) {
|
|
109
|
+
// Java requires an explicit `public` modifier in the declaration's `modifiers` child.
|
|
110
|
+
// Package-private (no modifier) and `protected` / `private` are excluded from the
|
|
111
|
+
// generated API reference; readers reading "what does this class expose" expect the
|
|
112
|
+
// formal `public` API surface.
|
|
113
|
+
for (let i = 0; i < definitionNode.namedChildCount; i += 1) {
|
|
114
|
+
const child = definitionNode.namedChild(i);
|
|
115
|
+
if (child && child.type === 'modifiers') {
|
|
116
|
+
return /\bpublic\b/.test(child.text);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return false;
|
|
120
|
+
}
|
|
121
|
+
const JAVA_CONFIG = {
|
|
122
|
+
id: 'java',
|
|
123
|
+
extensions: ['.java'],
|
|
124
|
+
projectSignals: ['pom.xml', 'build.gradle', 'build.gradle.kts', 'settings.gradle', 'settings.gradle.kts'],
|
|
125
|
+
vendorSubdir: 'java',
|
|
126
|
+
walkOptions: {
|
|
127
|
+
include: ['src/**/*.java', '**/*.java'],
|
|
128
|
+
exclude: ['**/test/**', '**/tests/**', '**/build/**', '**/target/**', '**/.gradle/**', '**/node_modules/**'],
|
|
129
|
+
respectInternalConvention: false
|
|
130
|
+
},
|
|
131
|
+
captureKindMap: {
|
|
132
|
+
'definition.class': 'class',
|
|
133
|
+
'definition.interface': 'interface',
|
|
134
|
+
'definition.method': 'function'
|
|
135
|
+
},
|
|
136
|
+
docComment: {
|
|
137
|
+
// Javadoc — block comments delimited by `/** */`. The renderer's block path strips
|
|
138
|
+
// leading `* ` from each interior line.
|
|
139
|
+
linePrefixes: [],
|
|
140
|
+
blockOpen: '/**',
|
|
141
|
+
blockClose: '*/'
|
|
142
|
+
},
|
|
143
|
+
bodyNodeTypes: new Set(['class_body', 'interface_body', 'block', 'enum_body', 'annotation_type_body']),
|
|
144
|
+
isPublic: javaIsPublic
|
|
145
|
+
};
|
|
146
|
+
// --- Ruby ------------------------------------------------------------------
|
|
147
|
+
function rubyIsPublic(_definitionNode, _source, _name) {
|
|
148
|
+
// Ruby's visibility model is more flexible than `public`/`private` modifiers — it's
|
|
149
|
+
// section-based via `private`/`protected` keywords inside class bodies. Properly tracking
|
|
150
|
+
// section state requires walking the surrounding class body, which we skip in this first
|
|
151
|
+
// cut. Since Ruby's *default* is public and most idiomatic Ruby code keeps the public
|
|
152
|
+
// API at module level (with `private` reserved for class internals), we accept the
|
|
153
|
+
// over-inclusion: every captured definition is treated as public. Future enhancement
|
|
154
|
+
// could detect intervening `private`/`protected` calls.
|
|
155
|
+
return true;
|
|
156
|
+
}
|
|
157
|
+
const RUBY_CONFIG = {
|
|
158
|
+
id: 'ruby',
|
|
159
|
+
extensions: ['.rb'],
|
|
160
|
+
projectSignals: ['Gemfile', 'Rakefile'],
|
|
161
|
+
vendorSubdir: 'ruby',
|
|
162
|
+
walkOptions: {
|
|
163
|
+
include: ['lib/**/*.rb', 'app/**/*.rb', '**/*.rb'],
|
|
164
|
+
exclude: ['**/spec/**', '**/test/**', '**/vendor/**', '**/node_modules/**', '**/tmp/**'],
|
|
165
|
+
respectInternalConvention: false
|
|
166
|
+
},
|
|
167
|
+
captureKindMap: {
|
|
168
|
+
'definition.class': 'class',
|
|
169
|
+
'definition.module': 'class',
|
|
170
|
+
'definition.method': 'function'
|
|
171
|
+
},
|
|
172
|
+
docComment: {
|
|
173
|
+
// Ruby uses `#` for line comments. Documentation generators (RDoc, YARD) attach
|
|
174
|
+
// contiguous `#`-prefixed comments to the following declaration.
|
|
175
|
+
linePrefixes: ['#']
|
|
176
|
+
},
|
|
177
|
+
bodyNodeTypes: new Set(['body_statement', 'do_block']),
|
|
178
|
+
isPublic: rubyIsPublic
|
|
179
|
+
};
|
|
180
|
+
// --- C ---------------------------------------------------------------------
|
|
181
|
+
function hasStaticStorageClass(node) {
|
|
182
|
+
for (let i = 0; i < node.namedChildCount; i += 1) {
|
|
183
|
+
const child = node.namedChild(i);
|
|
184
|
+
if (child && child.type === 'storage_class_specifier' && child.text.includes('static')) {
|
|
185
|
+
return true;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
return false;
|
|
189
|
+
}
|
|
190
|
+
function cIsPublic(definitionNode, _source, _name) {
|
|
191
|
+
// C has no language-level public/private, so the right cut is "is this declaration
|
|
192
|
+
// intended for the linker's external symbol table?" — i.e., NOT marked `static`.
|
|
193
|
+
// The C grammar's tags.scm captures `function_declarator` (a child of the wrapping
|
|
194
|
+
// `declaration` node), but `storage_class_specifier` lives on the declaration itself,
|
|
195
|
+
// so we have to look both at the captured node and its parent to find the modifier.
|
|
196
|
+
if (hasStaticStorageClass(definitionNode))
|
|
197
|
+
return false;
|
|
198
|
+
if (definitionNode.parent && hasStaticStorageClass(definitionNode.parent))
|
|
199
|
+
return false;
|
|
200
|
+
return true;
|
|
201
|
+
}
|
|
202
|
+
const C_CONFIG = {
|
|
203
|
+
id: 'c',
|
|
204
|
+
extensions: ['.c', '.h'],
|
|
205
|
+
projectSignals: ['Makefile', 'CMakeLists.txt', 'meson.build', 'configure.ac'],
|
|
206
|
+
vendorSubdir: 'c',
|
|
207
|
+
walkOptions: {
|
|
208
|
+
include: ['**/*.h', '**/*.c'],
|
|
209
|
+
exclude: ['**/build/**', '**/cmake-build-*/**', '**/.deps/**', '**/node_modules/**'],
|
|
210
|
+
respectInternalConvention: false
|
|
211
|
+
},
|
|
212
|
+
captureKindMap: {
|
|
213
|
+
'definition.class': 'class', // struct / union
|
|
214
|
+
'definition.function': 'function',
|
|
215
|
+
'definition.type': 'type-alias' // typedef / enum
|
|
216
|
+
},
|
|
217
|
+
docComment: {
|
|
218
|
+
// Doxygen convention. Line-prefix `///` and Javadoc-style block `/** */` both signal
|
|
219
|
+
// a doc comment in idiomatic C codebases.
|
|
220
|
+
linePrefixes: ['///'],
|
|
221
|
+
blockOpen: '/**',
|
|
222
|
+
blockClose: '*/'
|
|
223
|
+
},
|
|
224
|
+
bodyNodeTypes: new Set(['compound_statement', 'field_declaration_list', 'enumerator_list']),
|
|
225
|
+
isPublic: cIsPublic
|
|
226
|
+
};
|
|
227
|
+
// --- C++ -------------------------------------------------------------------
|
|
228
|
+
function cppIsPublic(definitionNode, _source, _name) {
|
|
229
|
+
// C++ inherits C's static-linkage rule for free-standing functions and adds class-member
|
|
230
|
+
// access specifiers. Properly tracking `public:` / `private:` / `protected:` sections
|
|
231
|
+
// requires walking back to the nearest access_specifier inside the surrounding class —
|
|
232
|
+
// we skip that for the first cut and apply C's static-only filter, which already covers
|
|
233
|
+
// the common case (free-standing functions in headers). Class members will be
|
|
234
|
+
// over-included; a follow-up can tighten this. Headers (`.h`/`.hpp`) are the public API
|
|
235
|
+
// surface anyway, and that's where most readers look first.
|
|
236
|
+
if (hasStaticStorageClass(definitionNode))
|
|
237
|
+
return false;
|
|
238
|
+
if (definitionNode.parent && hasStaticStorageClass(definitionNode.parent))
|
|
239
|
+
return false;
|
|
240
|
+
return true;
|
|
241
|
+
}
|
|
242
|
+
const CPP_CONFIG = {
|
|
243
|
+
id: 'cpp',
|
|
244
|
+
extensions: ['.cpp', '.cc', '.cxx', '.hpp', '.hh', '.hxx', '.h'],
|
|
245
|
+
projectSignals: ['CMakeLists.txt', 'Makefile', 'meson.build', 'conanfile.txt', 'conanfile.py'],
|
|
246
|
+
vendorSubdir: 'cpp',
|
|
247
|
+
walkOptions: {
|
|
248
|
+
include: ['**/*.hpp', '**/*.hh', '**/*.hxx', '**/*.h', '**/*.cpp', '**/*.cc', '**/*.cxx'],
|
|
249
|
+
exclude: ['**/build/**', '**/cmake-build-*/**', '**/.deps/**', '**/node_modules/**'],
|
|
250
|
+
respectInternalConvention: false
|
|
251
|
+
},
|
|
252
|
+
captureKindMap: {
|
|
253
|
+
'definition.class': 'class',
|
|
254
|
+
'definition.function': 'function',
|
|
255
|
+
'definition.method': 'function',
|
|
256
|
+
'definition.type': 'type-alias'
|
|
257
|
+
},
|
|
258
|
+
docComment: {
|
|
259
|
+
linePrefixes: ['///'],
|
|
260
|
+
blockOpen: '/**',
|
|
261
|
+
blockClose: '*/'
|
|
262
|
+
},
|
|
263
|
+
bodyNodeTypes: new Set(['compound_statement', 'field_declaration_list', 'enumerator_list', 'namespace_body']),
|
|
264
|
+
isPublic: cppIsPublic
|
|
265
|
+
};
|
|
266
|
+
// --- PHP -------------------------------------------------------------------
|
|
267
|
+
function phpIsPublic(definitionNode, _source, _name) {
|
|
268
|
+
// PHP defaults to public visibility. The relevant signal is whether the declaration's
|
|
269
|
+
// modifiers list contains `private` or `protected`; if so, exclude. If no modifiers or
|
|
270
|
+
// `public` is explicit, include.
|
|
271
|
+
for (let i = 0; i < definitionNode.namedChildCount; i += 1) {
|
|
272
|
+
const child = definitionNode.namedChild(i);
|
|
273
|
+
if (child && (child.type === 'visibility_modifier' || child.type === 'modifiers')) {
|
|
274
|
+
const text = child.text;
|
|
275
|
+
if (/\b(private|protected)\b/.test(text)) {
|
|
276
|
+
return false;
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
return true;
|
|
281
|
+
}
|
|
282
|
+
const PHP_CONFIG = {
|
|
283
|
+
id: 'php',
|
|
284
|
+
extensions: ['.php'],
|
|
285
|
+
projectSignals: ['composer.json'],
|
|
286
|
+
vendorSubdir: 'php',
|
|
287
|
+
walkOptions: {
|
|
288
|
+
include: ['src/**/*.php', 'lib/**/*.php', '**/*.php'],
|
|
289
|
+
exclude: ['**/vendor/**', '**/tests/**', '**/Tests/**', '**/node_modules/**', '**/.phpunit.cache/**'],
|
|
290
|
+
respectInternalConvention: false
|
|
291
|
+
},
|
|
292
|
+
captureKindMap: {
|
|
293
|
+
'definition.class': 'class',
|
|
294
|
+
// PHP's tags.scm captures both `interface` and `trait` as definition.interface — both
|
|
295
|
+
// are reasonable to render as interface-like surfaces.
|
|
296
|
+
'definition.interface': 'interface',
|
|
297
|
+
'definition.function': 'function'
|
|
298
|
+
},
|
|
299
|
+
docComment: {
|
|
300
|
+
// PHPDoc — same `/** */` shape as Javadoc.
|
|
301
|
+
linePrefixes: [],
|
|
302
|
+
blockOpen: '/**',
|
|
303
|
+
blockClose: '*/'
|
|
304
|
+
},
|
|
305
|
+
bodyNodeTypes: new Set(['compound_statement', 'declaration_list', 'enum_declaration_list']),
|
|
306
|
+
isPublic: phpIsPublic
|
|
307
|
+
};
|
|
308
|
+
// --- C# --------------------------------------------------------------------
|
|
309
|
+
function csharpIsPublic(definitionNode, _source, _name) {
|
|
310
|
+
// C#'s default access for class members is `private`; for top-level types it's
|
|
311
|
+
// `internal`. The API-reference contract is "what would a caller in another assembly
|
|
312
|
+
// see," so we require an explicit `public` modifier. Modifiers in tree-sitter-c-sharp
|
|
313
|
+
// appear as `modifier` children directly under the declaration.
|
|
314
|
+
for (let i = 0; i < definitionNode.namedChildCount; i += 1) {
|
|
315
|
+
const child = definitionNode.namedChild(i);
|
|
316
|
+
if (child && child.type === 'modifier' && child.text === 'public') {
|
|
317
|
+
return true;
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
return false;
|
|
321
|
+
}
|
|
322
|
+
const CSHARP_CONFIG = {
|
|
323
|
+
id: 'csharp',
|
|
324
|
+
extensions: ['.cs'],
|
|
325
|
+
projectSignals: ['global.json', 'Directory.Build.props', 'Directory.Build.targets'],
|
|
326
|
+
vendorSubdir: 'csharp',
|
|
327
|
+
// C#'s release publishes WASM with an underscore — `tree-sitter-c_sharp.wasm` —
|
|
328
|
+
// because the npm package convention forbids hyphens in module names. We honor that.
|
|
329
|
+
wasmFilename: 'tree-sitter-c_sharp.wasm',
|
|
330
|
+
walkOptions: {
|
|
331
|
+
include: ['**/*.cs'],
|
|
332
|
+
exclude: ['**/bin/**', '**/obj/**', '**/Tests/**', '**/*.Tests/**', '**/node_modules/**'],
|
|
333
|
+
respectInternalConvention: false
|
|
334
|
+
},
|
|
335
|
+
captureKindMap: {
|
|
336
|
+
'definition.class': 'class',
|
|
337
|
+
'definition.interface': 'interface',
|
|
338
|
+
'definition.method': 'function'
|
|
339
|
+
},
|
|
340
|
+
docComment: {
|
|
341
|
+
// C# XML-doc convention is `///` line comments. Some codebases also use
|
|
342
|
+
// `/** */`. Support both.
|
|
343
|
+
linePrefixes: ['///'],
|
|
344
|
+
blockOpen: '/**',
|
|
345
|
+
blockClose: '*/'
|
|
346
|
+
},
|
|
347
|
+
bodyNodeTypes: new Set(['declaration_list', 'block', 'enum_member_declaration_list']),
|
|
348
|
+
isPublic: csharpIsPublic
|
|
349
|
+
};
|
|
350
|
+
// --- Swift -----------------------------------------------------------------
|
|
351
|
+
function swiftIsPublic(definitionNode, _source, _name) {
|
|
352
|
+
// Swift's default access is `internal`. The two access levels above that — `public`
|
|
353
|
+
// (callable from other modules) and `open` (subclassable / overridable from other
|
|
354
|
+
// modules) — are what API docs should show. We accept both as "public" for the API
|
|
355
|
+
// reference; private/fileprivate/internal are filtered.
|
|
356
|
+
// Modifier nodes in the alex-pinkus grammar appear as `modifiers` (a parent list) with
|
|
357
|
+
// children of type `visibility_modifier`, `inheritance_modifier`, etc. Walk one level
|
|
358
|
+
// to find any visibility marker.
|
|
359
|
+
for (let i = 0; i < definitionNode.namedChildCount; i += 1) {
|
|
360
|
+
const child = definitionNode.namedChild(i);
|
|
361
|
+
if (!child)
|
|
362
|
+
continue;
|
|
363
|
+
if (child.type === 'modifiers') {
|
|
364
|
+
const text = child.text;
|
|
365
|
+
if (/\b(public|open)\b/.test(text))
|
|
366
|
+
return true;
|
|
367
|
+
if (/\b(private|fileprivate|internal)\b/.test(text))
|
|
368
|
+
return false;
|
|
369
|
+
}
|
|
370
|
+
if (child.type === 'visibility_modifier') {
|
|
371
|
+
const text = child.text;
|
|
372
|
+
if (text === 'public' || text === 'open')
|
|
373
|
+
return true;
|
|
374
|
+
if (text === 'private' || text === 'fileprivate' || text === 'internal')
|
|
375
|
+
return false;
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
// No explicit modifier → Swift default is `internal`, which we treat as not-public for
|
|
379
|
+
// API reference purposes.
|
|
380
|
+
return false;
|
|
381
|
+
}
|
|
382
|
+
const SWIFT_CONFIG = {
|
|
383
|
+
id: 'swift',
|
|
384
|
+
extensions: ['.swift'],
|
|
385
|
+
projectSignals: ['Package.swift', 'Podfile', 'project.yml'],
|
|
386
|
+
vendorSubdir: 'swift',
|
|
387
|
+
walkOptions: {
|
|
388
|
+
include: ['Sources/**/*.swift', '**/*.swift'],
|
|
389
|
+
exclude: ['**/Tests/**', '**/.build/**', '**/Pods/**', '**/DerivedData/**', '**/node_modules/**'],
|
|
390
|
+
respectInternalConvention: false
|
|
391
|
+
},
|
|
392
|
+
captureKindMap: {
|
|
393
|
+
'definition.class': 'class',
|
|
394
|
+
// Swift `protocol` is the closest equivalent to an interface.
|
|
395
|
+
'definition.interface': 'interface',
|
|
396
|
+
'definition.method': 'function',
|
|
397
|
+
'definition.function': 'function',
|
|
398
|
+
'definition.property': 'variable'
|
|
399
|
+
},
|
|
400
|
+
docComment: {
|
|
401
|
+
// Swift's documentation convention is `///` outer-doc lines and `/** */` blocks.
|
|
402
|
+
linePrefixes: ['///'],
|
|
403
|
+
blockOpen: '/**',
|
|
404
|
+
blockClose: '*/'
|
|
405
|
+
},
|
|
406
|
+
bodyNodeTypes: new Set(['class_body', 'protocol_body', 'function_body']),
|
|
407
|
+
isPublic: swiftIsPublic
|
|
408
|
+
};
|
|
409
|
+
// --- Lua -------------------------------------------------------------------
|
|
410
|
+
function luaIsPublic(definitionNode, source, _name) {
|
|
411
|
+
// Lua has no language-level visibility; the convention is the `local` keyword
|
|
412
|
+
// (`local function foo()` / `local foo = function() end`). tree-sitter-lua's
|
|
413
|
+
// `function_declaration` and `assignment_statement` both INCLUDE the leading `local`
|
|
414
|
+
// token as part of the captured node when present, so the cheapest reliable check is
|
|
415
|
+
// whether the captured text starts with `local`. We also do a small backward look at
|
|
416
|
+
// the source immediately before the node in case a future grammar revision changes
|
|
417
|
+
// where the `local` keyword sits in the parse tree.
|
|
418
|
+
const text = definitionNode.text;
|
|
419
|
+
if (/^\s*local\b/.test(text)) {
|
|
420
|
+
return false;
|
|
421
|
+
}
|
|
422
|
+
const lookback = source.slice(Math.max(0, definitionNode.startIndex - 32), definitionNode.startIndex);
|
|
423
|
+
if (/\blocal\s+$/.test(lookback)) {
|
|
424
|
+
return false;
|
|
425
|
+
}
|
|
426
|
+
return true;
|
|
427
|
+
}
|
|
428
|
+
const LUA_CONFIG = {
|
|
429
|
+
id: 'lua',
|
|
430
|
+
extensions: ['.lua'],
|
|
431
|
+
// Lua has no canonical project file. LuaRocks `.rockspec` is closest, but we also
|
|
432
|
+
// accept any directory containing Lua sources by listing `init.lua` (Neovim plugin
|
|
433
|
+
// convention) and the LuaRocks rocks directory.
|
|
434
|
+
projectSignals: ['init.lua', '.luarocks'],
|
|
435
|
+
vendorSubdir: 'lua',
|
|
436
|
+
walkOptions: {
|
|
437
|
+
include: ['lua/**/*.lua', 'src/**/*.lua', '**/*.lua'],
|
|
438
|
+
exclude: ['**/spec/**', '**/test/**', '**/.luarocks/**', '**/node_modules/**'],
|
|
439
|
+
respectInternalConvention: false
|
|
440
|
+
},
|
|
441
|
+
captureKindMap: {
|
|
442
|
+
'definition.function': 'function',
|
|
443
|
+
'definition.method': 'function'
|
|
444
|
+
},
|
|
445
|
+
docComment: {
|
|
446
|
+
// Lua line comments are `--`. The LDoc convention adds a triple-dash for doc
|
|
447
|
+
// comments (`---`). Both prefixes count, with longest-first ordering so `---` wins
|
|
448
|
+
// over `--` on lines that have both.
|
|
449
|
+
linePrefixes: ['---', '--']
|
|
450
|
+
},
|
|
451
|
+
bodyNodeTypes: new Set(['block']),
|
|
452
|
+
isPublic: luaIsPublic
|
|
453
|
+
};
|
|
454
|
+
// --- Scala -----------------------------------------------------------------
|
|
455
|
+
function scalaIsPublic(definitionNode, _source, _name) {
|
|
456
|
+
// Scala defaults class members to public; explicit `private` / `protected` modifiers
|
|
457
|
+
// exclude. The grammar surfaces modifiers as a `modifiers` child or directly as
|
|
458
|
+
// `access_modifier` siblings; check both.
|
|
459
|
+
for (let i = 0; i < definitionNode.namedChildCount; i += 1) {
|
|
460
|
+
const child = definitionNode.namedChild(i);
|
|
461
|
+
if (!child)
|
|
462
|
+
continue;
|
|
463
|
+
if (child.type === 'modifiers' || child.type === 'access_modifier' || child.type === 'modifier') {
|
|
464
|
+
const text = child.text;
|
|
465
|
+
if (/\b(private|protected)\b/.test(text))
|
|
466
|
+
return false;
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
return true;
|
|
470
|
+
}
|
|
471
|
+
const SCALA_CONFIG = {
|
|
472
|
+
id: 'scala',
|
|
473
|
+
extensions: ['.scala', '.sc'],
|
|
474
|
+
projectSignals: ['build.sbt', 'build.sc', 'pom.xml'],
|
|
475
|
+
vendorSubdir: 'scala',
|
|
476
|
+
walkOptions: {
|
|
477
|
+
include: ['src/**/*.scala', '**/*.scala'],
|
|
478
|
+
exclude: ['**/test/**', '**/target/**', '**/.bloop/**', '**/.metals/**', '**/node_modules/**'],
|
|
479
|
+
respectInternalConvention: false
|
|
480
|
+
},
|
|
481
|
+
captureKindMap: {
|
|
482
|
+
'definition.class': 'class',
|
|
483
|
+
'definition.interface': 'interface', // trait
|
|
484
|
+
'definition.enum': 'enum',
|
|
485
|
+
'definition.function': 'function',
|
|
486
|
+
'definition.object': 'class' // singleton object — closest existing kind
|
|
487
|
+
},
|
|
488
|
+
docComment: {
|
|
489
|
+
// Scaladoc — same `/** */` shape as Javadoc.
|
|
490
|
+
linePrefixes: [],
|
|
491
|
+
blockOpen: '/**',
|
|
492
|
+
blockClose: '*/'
|
|
493
|
+
},
|
|
494
|
+
bodyNodeTypes: new Set(['template_body', 'block', 'class_parameters']),
|
|
495
|
+
isPublic: scalaIsPublic
|
|
496
|
+
};
|
|
497
|
+
// --- Elixir ----------------------------------------------------------------
|
|
498
|
+
function elixirIsPublic(_definitionNode, source, _name) {
|
|
499
|
+
// Elixir distinguishes `def` (public) from `defp` (private). The capture in tags.scm
|
|
500
|
+
// is parameterized by the `target.identifier` name (def/defp/etc.); we check the source
|
|
501
|
+
// text immediately preceding the captured node for the relevant keyword.
|
|
502
|
+
const startIdx = _definitionNode.startIndex;
|
|
503
|
+
const window = source.slice(Math.max(0, startIdx - 20), startIdx + 8);
|
|
504
|
+
if (/\bdefp\b/.test(window))
|
|
505
|
+
return false;
|
|
506
|
+
if (/\bdefmacrop\b/.test(window))
|
|
507
|
+
return false;
|
|
508
|
+
if (/\bdefguardp\b/.test(window))
|
|
509
|
+
return false;
|
|
510
|
+
if (/\bdefnp\b/.test(window))
|
|
511
|
+
return false;
|
|
512
|
+
return true;
|
|
513
|
+
}
|
|
514
|
+
const ELIXIR_CONFIG = {
|
|
515
|
+
id: 'elixir',
|
|
516
|
+
extensions: ['.ex', '.exs'],
|
|
517
|
+
projectSignals: ['mix.exs'],
|
|
518
|
+
vendorSubdir: 'elixir',
|
|
519
|
+
walkOptions: {
|
|
520
|
+
include: ['lib/**/*.ex', '**/*.ex'],
|
|
521
|
+
exclude: ['**/test/**', '**/_build/**', '**/deps/**', '**/.elixir_ls/**', '**/node_modules/**'],
|
|
522
|
+
respectInternalConvention: false
|
|
523
|
+
},
|
|
524
|
+
captureKindMap: {
|
|
525
|
+
'definition.module': 'class',
|
|
526
|
+
'definition.function': 'function'
|
|
527
|
+
},
|
|
528
|
+
docComment: {
|
|
529
|
+
// Elixir's `@doc` attribute holds the prose, but at the source level it appears as
|
|
530
|
+
// a `@doc """ ... """` heredoc preceding the def. The simpler convention also seen in
|
|
531
|
+
// libraries is `#`-prefixed line comments. Our walker handles both: `#` lines win
|
|
532
|
+
// first; heredoc `@doc` would need extractor-level support beyond this first cut.
|
|
533
|
+
linePrefixes: ['#']
|
|
534
|
+
},
|
|
535
|
+
bodyNodeTypes: new Set(['do_block', 'block']),
|
|
536
|
+
isPublic: elixirIsPublic
|
|
537
|
+
};
|
|
538
|
+
// --- OCaml -----------------------------------------------------------------
|
|
539
|
+
function ocamlIsPublic(_definitionNode, _source, _name) {
|
|
540
|
+
// OCaml's visibility model lives in module signatures (`.mli` files) — anything
|
|
541
|
+
// exposed there is public. Inside `.ml` files everything is technically reachable from
|
|
542
|
+
// outside the module unless the project ships a signature that hides it. For this
|
|
543
|
+
// first cut we treat all captured definitions as public; a future enhancement can
|
|
544
|
+
// honor signature files.
|
|
545
|
+
return true;
|
|
546
|
+
}
|
|
547
|
+
const OCAML_CONFIG = {
|
|
548
|
+
id: 'ocaml',
|
|
549
|
+
extensions: ['.ml', '.mli'],
|
|
550
|
+
projectSignals: ['dune-project', 'dune', '_oasis'],
|
|
551
|
+
vendorSubdir: 'ocaml',
|
|
552
|
+
walkOptions: {
|
|
553
|
+
include: ['**/*.ml', '**/*.mli'],
|
|
554
|
+
exclude: ['**/_build/**', '**/.merlin', '**/node_modules/**'],
|
|
555
|
+
respectInternalConvention: false
|
|
556
|
+
},
|
|
557
|
+
captureKindMap: {
|
|
558
|
+
'definition.module': 'class', // OCaml modules are the closest analogue
|
|
559
|
+
'definition.interface': 'interface',
|
|
560
|
+
'definition.class': 'class',
|
|
561
|
+
'definition.function': 'function',
|
|
562
|
+
'definition.method': 'function'
|
|
563
|
+
},
|
|
564
|
+
docComment: {
|
|
565
|
+
// OCaml's documentation convention is `(** ... *)` block comments, with the `**`
|
|
566
|
+
// prefix distinguishing them from regular `(* ... *)` comments.
|
|
567
|
+
linePrefixes: [],
|
|
568
|
+
blockOpen: '(**',
|
|
569
|
+
blockClose: '*)'
|
|
570
|
+
},
|
|
571
|
+
bodyNodeTypes: new Set(['structure', 'signature', 'module_binding']),
|
|
572
|
+
isPublic: ocamlIsPublic
|
|
573
|
+
};
|
|
574
|
+
// --- Kotlin ----------------------------------------------------------------
|
|
575
|
+
function kotlinIsPublic(definitionNode, _source, _name) {
|
|
576
|
+
// Kotlin defaults to public visibility; explicit `private`, `protected`, or `internal`
|
|
577
|
+
// modifiers exclude. Modifiers appear as a `modifiers` child whose textual content
|
|
578
|
+
// contains the visibility keyword.
|
|
579
|
+
for (let i = 0; i < definitionNode.namedChildCount; i += 1) {
|
|
580
|
+
const child = definitionNode.namedChild(i);
|
|
581
|
+
if (!child)
|
|
582
|
+
continue;
|
|
583
|
+
if (child.type === 'modifiers' || child.type === 'modifier' || child.type === 'visibility_modifier') {
|
|
584
|
+
const text = child.text;
|
|
585
|
+
if (/\b(private|protected|internal)\b/.test(text))
|
|
586
|
+
return false;
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
return true;
|
|
590
|
+
}
|
|
591
|
+
const KOTLIN_CONFIG = {
|
|
592
|
+
id: 'kotlin',
|
|
593
|
+
extensions: ['.kt', '.kts'],
|
|
594
|
+
projectSignals: ['build.gradle.kts', 'settings.gradle.kts', 'build.gradle', 'pom.xml'],
|
|
595
|
+
vendorSubdir: 'kotlin',
|
|
596
|
+
walkOptions: {
|
|
597
|
+
include: ['src/**/*.kt', 'src/**/*.kts', '**/*.kt', '**/*.kts'],
|
|
598
|
+
exclude: ['**/test/**', '**/build/**', '**/.gradle/**', '**/node_modules/**'],
|
|
599
|
+
respectInternalConvention: false
|
|
600
|
+
},
|
|
601
|
+
captureKindMap: {
|
|
602
|
+
'definition.class': 'class',
|
|
603
|
+
'definition.function': 'function'
|
|
604
|
+
},
|
|
605
|
+
docComment: {
|
|
606
|
+
// KDoc — same `/** */` shape as Javadoc.
|
|
607
|
+
linePrefixes: [],
|
|
608
|
+
blockOpen: '/**',
|
|
609
|
+
blockClose: '*/'
|
|
610
|
+
},
|
|
611
|
+
bodyNodeTypes: new Set(['class_body', 'function_body', 'enum_class_body', 'block']),
|
|
612
|
+
isPublic: kotlinIsPublic
|
|
613
|
+
};
|
|
614
|
+
// --- Bash ------------------------------------------------------------------
|
|
615
|
+
function bashIsPublic(_definitionNode, _source, _name) {
|
|
616
|
+
// Bash has no language-level visibility. Every function definition in a script is
|
|
617
|
+
// reachable by any caller in the same shell. We surface them all.
|
|
618
|
+
return true;
|
|
619
|
+
}
|
|
620
|
+
const BASH_CONFIG = {
|
|
621
|
+
id: 'bash',
|
|
622
|
+
extensions: ['.sh', '.bash'],
|
|
623
|
+
// Shell scripts have no canonical project-marker file, so we fall back to a content-
|
|
624
|
+
// based claim: detect-time walker finds at least one .sh / .bash file under the root.
|
|
625
|
+
projectSignals: [],
|
|
626
|
+
requireExtensionPresent: true,
|
|
627
|
+
vendorSubdir: 'bash',
|
|
628
|
+
walkOptions: {
|
|
629
|
+
include: ['**/*.sh', '**/*.bash'],
|
|
630
|
+
exclude: ['**/node_modules/**', '**/.git/**'],
|
|
631
|
+
respectInternalConvention: false
|
|
632
|
+
},
|
|
633
|
+
captureKindMap: {
|
|
634
|
+
'definition.function': 'function'
|
|
635
|
+
},
|
|
636
|
+
docComment: {
|
|
637
|
+
// Bash only has line comments with `#`.
|
|
638
|
+
linePrefixes: ['#']
|
|
639
|
+
},
|
|
640
|
+
bodyNodeTypes: new Set(['compound_statement']),
|
|
641
|
+
isPublic: bashIsPublic
|
|
642
|
+
};
|
|
643
|
+
const LANGUAGES = [
|
|
644
|
+
RUST_CONFIG,
|
|
645
|
+
GO_CONFIG,
|
|
646
|
+
JAVA_CONFIG,
|
|
647
|
+
RUBY_CONFIG,
|
|
648
|
+
C_CONFIG,
|
|
649
|
+
CPP_CONFIG,
|
|
650
|
+
PHP_CONFIG,
|
|
651
|
+
CSHARP_CONFIG,
|
|
652
|
+
SWIFT_CONFIG,
|
|
653
|
+
LUA_CONFIG,
|
|
654
|
+
SCALA_CONFIG,
|
|
655
|
+
ELIXIR_CONFIG,
|
|
656
|
+
OCAML_CONFIG,
|
|
657
|
+
KOTLIN_CONFIG,
|
|
658
|
+
BASH_CONFIG
|
|
659
|
+
];
|
|
660
|
+
const moduleDir = path.dirname(fileURLToPath(import.meta.url));
|
|
661
|
+
// Walk upward from the compiled/source module location to find `vendor/tree-sitter`. This
|
|
662
|
+
// works under both `tsx` (running TypeScript directly from `src/`) and the built JS layout
|
|
663
|
+
// (`dist/src/wiki/api-extractor/...`) because each layout has a different relative depth
|
|
664
|
+
// to the project root.
|
|
665
|
+
function resolveVendorRoot() {
|
|
666
|
+
let dir = moduleDir;
|
|
667
|
+
// Bound the walk so we never escape arbitrarily far.
|
|
668
|
+
for (let i = 0; i < 8; i += 1) {
|
|
669
|
+
const candidate = path.join(dir, 'vendor', 'tree-sitter');
|
|
670
|
+
if (existsSync(candidate)) {
|
|
671
|
+
return candidate;
|
|
672
|
+
}
|
|
673
|
+
const parent = path.dirname(dir);
|
|
674
|
+
if (parent === dir)
|
|
675
|
+
break;
|
|
676
|
+
dir = parent;
|
|
677
|
+
}
|
|
678
|
+
return null;
|
|
679
|
+
}
|
|
680
|
+
let parserInitPromise = null;
|
|
681
|
+
async function ensureParserInit() {
|
|
682
|
+
if (!parserInitPromise) {
|
|
683
|
+
parserInitPromise = Parser.init();
|
|
684
|
+
}
|
|
685
|
+
return parserInitPromise;
|
|
686
|
+
}
|
|
687
|
+
const loadedGrammars = new Map();
|
|
688
|
+
async function loadGrammar(config) {
|
|
689
|
+
const cached = loadedGrammars.get(config.id);
|
|
690
|
+
if (cached !== undefined) {
|
|
691
|
+
return cached;
|
|
692
|
+
}
|
|
693
|
+
const promise = (async () => {
|
|
694
|
+
const vendorRoot = resolveVendorRoot();
|
|
695
|
+
if (!vendorRoot) {
|
|
696
|
+
return null;
|
|
697
|
+
}
|
|
698
|
+
const wasmFilename = config.wasmFilename ?? `tree-sitter-${config.id}.wasm`;
|
|
699
|
+
const wasmPath = path.join(vendorRoot, config.vendorSubdir, wasmFilename);
|
|
700
|
+
const tagsScmPath = path.join(vendorRoot, config.vendorSubdir, 'tags.scm');
|
|
701
|
+
if (!existsSync(wasmPath) || !existsSync(tagsScmPath)) {
|
|
702
|
+
return null;
|
|
703
|
+
}
|
|
704
|
+
await ensureParserInit();
|
|
705
|
+
const language = await Language.load(wasmPath);
|
|
706
|
+
const queryText = await fs.readFile(tagsScmPath, 'utf8');
|
|
707
|
+
const query = new Query(language, queryText);
|
|
708
|
+
return { config, language, query };
|
|
709
|
+
})();
|
|
710
|
+
loadedGrammars.set(config.id, promise);
|
|
711
|
+
return promise;
|
|
712
|
+
}
|
|
713
|
+
// Test-only escape hatch: clear the cache so tests can simulate cold loads or replace
|
|
714
|
+
// vendored bundles between runs.
|
|
715
|
+
export function resetTreeSitterGrammarCache() {
|
|
716
|
+
loadedGrammars.clear();
|
|
717
|
+
}
|
|
718
|
+
function languageForExtension(filePath) {
|
|
719
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
720
|
+
for (const lang of LANGUAGES) {
|
|
721
|
+
if (lang.extensions.includes(ext)) {
|
|
722
|
+
return lang;
|
|
723
|
+
}
|
|
724
|
+
}
|
|
725
|
+
return null;
|
|
726
|
+
}
|
|
727
|
+
function defaultIncludeFor(config) {
|
|
728
|
+
// Build a generic include list from the language's extensions when the config doesn't
|
|
729
|
+
// override walkOptions.include. e.g., `.rs` → ['**/*.rs'].
|
|
730
|
+
return config.extensions.map((ext) => `**/*${ext}`);
|
|
731
|
+
}
|
|
732
|
+
function findCaptureNode(captures, name) {
|
|
733
|
+
return captures.find((capture) => capture.name === name)?.node;
|
|
734
|
+
}
|
|
735
|
+
// When multiple `@definition.*` captures could fire for the same node (e.g., Swift's
|
|
736
|
+
// grammar matches a class method as both `definition.method` and `definition.function`,
|
|
737
|
+
// and PHP captures `interface` and `trait` both as `definition.interface`), we want
|
|
738
|
+
// deterministic kind selection — not "whichever pattern tree-sitter iterated first."
|
|
739
|
+
// Lower index = higher priority. Names not in the list fall back to lowest priority.
|
|
740
|
+
const DEFINITION_CAPTURE_PRIORITY = [
|
|
741
|
+
'definition.class',
|
|
742
|
+
'definition.interface',
|
|
743
|
+
'definition.enum',
|
|
744
|
+
'definition.method',
|
|
745
|
+
'definition.function',
|
|
746
|
+
'definition.macro',
|
|
747
|
+
'definition.module',
|
|
748
|
+
'definition.type',
|
|
749
|
+
'definition.field',
|
|
750
|
+
'definition.property',
|
|
751
|
+
'definition.object'
|
|
752
|
+
];
|
|
753
|
+
function definitionCapturePriority(name) {
|
|
754
|
+
const idx = DEFINITION_CAPTURE_PRIORITY.indexOf(name);
|
|
755
|
+
return idx === -1 ? Number.MAX_SAFE_INTEGER : idx;
|
|
756
|
+
}
|
|
757
|
+
function findCaptureNodeForDefinition(captures) {
|
|
758
|
+
// tags.scm conventionally captures the WHOLE definition node under `@definition.<kind>`
|
|
759
|
+
// (class/function/method/interface/etc.), and the symbol's name under `@name`. When a
|
|
760
|
+
// pattern produces multiple definition captures, pick the highest-priority one so the
|
|
761
|
+
// rendered kind is deterministic across grammar version bumps.
|
|
762
|
+
let best = null;
|
|
763
|
+
for (const capture of captures) {
|
|
764
|
+
if (!capture.name.startsWith('definition.'))
|
|
765
|
+
continue;
|
|
766
|
+
const priority = definitionCapturePriority(capture.name);
|
|
767
|
+
if (!best || priority < best.priority) {
|
|
768
|
+
best = { capture, kindCaptureName: capture.name, priority };
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
return best ? { capture: best.capture, kindCaptureName: best.kindCaptureName } : null;
|
|
772
|
+
}
|
|
773
|
+
// Different grammars use different node-type names for comments. `comment` is the most
|
|
774
|
+
// common; Rust/Java/C/C++ use `line_comment` and `block_comment`; Kotlin uses
|
|
775
|
+
// `multiline_comment` for /* */ blocks; Scala uses `block_comment`. Keep the set wide.
|
|
776
|
+
const COMMENT_NODE_TYPES = new Set(['line_comment', 'block_comment', 'comment', 'multiline_comment']);
|
|
777
|
+
function findStartingDocCursor(definitionNode) {
|
|
778
|
+
// Locate a preceding-named-sibling that is a comment. Several grammars don't put doc
|
|
779
|
+
// comments at the same level as the captured node:
|
|
780
|
+
// * C captures `function_declarator` whose immediate previous sibling is the type
|
|
781
|
+
// specifier (not a comment); walk up to the wrapping `declaration` node.
|
|
782
|
+
// * fwcd's Kotlin grammar absorbs the trailing `/** */` block into the preceding
|
|
783
|
+
// `package_header` node — the comment ends up as the last named descendant of the
|
|
784
|
+
// wrapping sibling rather than as a sibling of the class.
|
|
785
|
+
// Strategy: walk up through ancestors; for each, take previousNamedSibling. If it's a
|
|
786
|
+
// comment, done. Otherwise descend into its last named child chain looking for a
|
|
787
|
+
// trailing comment. Bounded depth keeps the walk tractable.
|
|
788
|
+
let walker = definitionNode;
|
|
789
|
+
for (let i = 0; i < 4 && walker; i += 1) {
|
|
790
|
+
const prev = walker.previousNamedSibling;
|
|
791
|
+
if (prev) {
|
|
792
|
+
if (COMMENT_NODE_TYPES.has(prev.type)) {
|
|
793
|
+
return prev;
|
|
794
|
+
}
|
|
795
|
+
// Try the last named descendant of prev — handles grammars like Kotlin's where a
|
|
796
|
+
// trailing comment is absorbed into the preceding sibling node.
|
|
797
|
+
let inner = prev;
|
|
798
|
+
while (inner && inner.namedChildCount > 0) {
|
|
799
|
+
const lastChild = inner.namedChild(inner.namedChildCount - 1);
|
|
800
|
+
if (!lastChild)
|
|
801
|
+
break;
|
|
802
|
+
if (COMMENT_NODE_TYPES.has(lastChild.type)) {
|
|
803
|
+
return lastChild;
|
|
804
|
+
}
|
|
805
|
+
inner = lastChild;
|
|
806
|
+
}
|
|
807
|
+
}
|
|
808
|
+
walker = walker.parent;
|
|
809
|
+
}
|
|
810
|
+
return null;
|
|
811
|
+
}
|
|
812
|
+
function collectAdjacentDocComment(definitionNode, source, rule) {
|
|
813
|
+
// Walk backward through preceding named siblings, collecting contiguous comment lines
|
|
814
|
+
// that match the language's doc-comment convention. We use named-sibling traversal so
|
|
815
|
+
// unnamed punctuation/newline tokens between a comment and its target don't break the
|
|
816
|
+
// chain — different grammars expose those gaps differently and named traversal is the
|
|
817
|
+
// portable path. When the captured definition has no preceding sibling at its own level,
|
|
818
|
+
// we walk up to its parent (e.g., from `function_declarator` to the surrounding
|
|
819
|
+
// `declaration`) so doc comments wrapped one level out still attach.
|
|
820
|
+
const lines = [];
|
|
821
|
+
let cursor = findStartingDocCursor(definitionNode);
|
|
822
|
+
while (cursor) {
|
|
823
|
+
if (!COMMENT_NODE_TYPES.has(cursor.type)) {
|
|
824
|
+
break;
|
|
825
|
+
}
|
|
826
|
+
const raw = source.slice(cursor.startIndex, cursor.endIndex);
|
|
827
|
+
let body = null;
|
|
828
|
+
for (const prefix of rule.linePrefixes) {
|
|
829
|
+
if (raw.startsWith(prefix)) {
|
|
830
|
+
body = raw.slice(prefix.length).trimStart();
|
|
831
|
+
break;
|
|
832
|
+
}
|
|
833
|
+
}
|
|
834
|
+
if (body === null && rule.blockOpen && rule.blockClose) {
|
|
835
|
+
if (raw.startsWith(rule.blockOpen) && raw.endsWith(rule.blockClose)) {
|
|
836
|
+
const inner = raw.slice(rule.blockOpen.length, raw.length - rule.blockClose.length);
|
|
837
|
+
body = inner
|
|
838
|
+
.split(/\r?\n/)
|
|
839
|
+
.map((line) => line.replace(/^\s*\*\s?/, ''))
|
|
840
|
+
.join('\n')
|
|
841
|
+
.trim();
|
|
842
|
+
}
|
|
843
|
+
}
|
|
844
|
+
if (body === null) {
|
|
845
|
+
break;
|
|
846
|
+
}
|
|
847
|
+
lines.unshift(body);
|
|
848
|
+
cursor = cursor.previousNamedSibling;
|
|
849
|
+
}
|
|
850
|
+
const joined = lines.join('\n').trim();
|
|
851
|
+
return joined.length > 0 ? joined : null;
|
|
852
|
+
}
|
|
853
|
+
function buildSignature(node, source, bodyNodeTypes) {
|
|
854
|
+
// Strip the body of the definition for compactness on the API page. A function or
|
|
855
|
+
// method signature lives in the source up to (but excluding) its body child (block /
|
|
856
|
+
// class_body / field_declaration_list / etc., per language); for items without a body
|
|
857
|
+
// (type aliases, struct-only-header declarations, etc.) we keep the full text. This
|
|
858
|
+
// produces clean signatures like `pub fn translate(key: DendriteI18nKey) -> String`
|
|
859
|
+
// instead of dumping the entire function body into the page.
|
|
860
|
+
const bodyChild = findBodyChild(node, bodyNodeTypes);
|
|
861
|
+
let endIndex = node.endIndex;
|
|
862
|
+
if (bodyChild) {
|
|
863
|
+
endIndex = bodyChild.startIndex;
|
|
864
|
+
}
|
|
865
|
+
return source.slice(node.startIndex, endIndex).trim().replace(/\s+$/, '');
|
|
866
|
+
}
|
|
867
|
+
function findBodyChild(node, bodyNodeTypes) {
|
|
868
|
+
for (let i = 0; i < node.childCount; i += 1) {
|
|
869
|
+
const child = node.child(i);
|
|
870
|
+
if (child && bodyNodeTypes.has(child.type)) {
|
|
871
|
+
return child;
|
|
872
|
+
}
|
|
873
|
+
}
|
|
874
|
+
return null;
|
|
875
|
+
}
|
|
876
|
+
function deriveModuleSlug(relativeSourcePath) {
|
|
877
|
+
const trimmed = relativeSourcePath.replace(/\\/g, '/').replace(/^\.\//, '');
|
|
878
|
+
const withoutExt = trimmed.replace(/\.[a-z0-9]+$/i, '');
|
|
879
|
+
const stripped = withoutExt.replace(/^src\//, '');
|
|
880
|
+
return `api/${stripped}`;
|
|
881
|
+
}
|
|
882
|
+
function extractFileDocCommentRust(source) {
|
|
883
|
+
// Rust uses `//!` as the inner-doc / module-doc convention. Walk the leading lines of
|
|
884
|
+
// the file collecting consecutive `//!` lines. Lines that appear in real Rust file
|
|
885
|
+
// headers and that we treat as ignorable prelude:
|
|
886
|
+
// - shebang (`#!/usr/bin/env cargo`) — only valid on the first line
|
|
887
|
+
// - outer attributes (`#![deny(warnings)]`, `#![cfg_attr(...)]`, etc.)
|
|
888
|
+
// - blank lines (always)
|
|
889
|
+
// Without skipping these, a typical `main.rs` whose first line is `#![deny(warnings)]`
|
|
890
|
+
// would terminate doc collection before any `//!` line ever started — silently dropping
|
|
891
|
+
// the module-level documentation for binary crates.
|
|
892
|
+
const lines = source.split(/\r?\n/);
|
|
893
|
+
const collected = [];
|
|
894
|
+
for (const line of lines) {
|
|
895
|
+
const trimmed = line.trimStart();
|
|
896
|
+
if (trimmed.startsWith('//!')) {
|
|
897
|
+
collected.push(trimmed.slice(3).trimStart());
|
|
898
|
+
}
|
|
899
|
+
else if (trimmed.length === 0 && collected.length > 0) {
|
|
900
|
+
// Blank line right after a `//!` block — keep it as a paragraph break.
|
|
901
|
+
collected.push('');
|
|
902
|
+
}
|
|
903
|
+
else if (collected.length > 0) {
|
|
904
|
+
break;
|
|
905
|
+
}
|
|
906
|
+
else if (trimmed.length === 0) {
|
|
907
|
+
// Leading blank lines — skip.
|
|
908
|
+
continue;
|
|
909
|
+
}
|
|
910
|
+
else if (trimmed.startsWith('#!')) {
|
|
911
|
+
// Shebang or outer-attribute prelude (`#!/...` or `#![attr]`). Both are valid Rust
|
|
912
|
+
// file-header content that must NOT terminate doc-comment collection. Skip.
|
|
913
|
+
continue;
|
|
914
|
+
}
|
|
915
|
+
else {
|
|
916
|
+
break;
|
|
917
|
+
}
|
|
918
|
+
}
|
|
919
|
+
const body = collected.join('\n').trim();
|
|
920
|
+
return body.length > 0 ? body : null;
|
|
921
|
+
}
|
|
922
|
+
async function extractWithGrammar(loaded, sourcePath, rootDir) {
|
|
923
|
+
const absolute = path.isAbsolute(sourcePath) ? sourcePath : path.resolve(rootDir, sourcePath);
|
|
924
|
+
const relative = path.relative(rootDir, absolute).replace(/\\/g, '/');
|
|
925
|
+
const source = await fs.readFile(absolute, 'utf8');
|
|
926
|
+
const parser = new Parser();
|
|
927
|
+
parser.setLanguage(loaded.language);
|
|
928
|
+
const tree = parser.parse(source);
|
|
929
|
+
if (!tree) {
|
|
930
|
+
throw new Error(`tree-sitter failed to parse ${relative}`);
|
|
931
|
+
}
|
|
932
|
+
const candidatesByNode = new Map();
|
|
933
|
+
for (const match of loaded.query.matches(tree.rootNode)) {
|
|
934
|
+
const definition = findCaptureNodeForDefinition(match.captures);
|
|
935
|
+
if (!definition)
|
|
936
|
+
continue;
|
|
937
|
+
if (!loaded.config.captureKindMap[definition.kindCaptureName])
|
|
938
|
+
continue;
|
|
939
|
+
const definitionNode = definition.capture.node;
|
|
940
|
+
const nameNode = findCaptureNode(match.captures, 'name');
|
|
941
|
+
// Skip *this match* if the name capture is missing or empty — but DO NOT drop the
|
|
942
|
+
// node entirely: a separate (lower-priority) match for the same node may still carry
|
|
943
|
+
// a valid name. The previous one-pass loop also `continue`d here, advancing to the
|
|
944
|
+
// next match without recording the node; the two-pass refactor preserves that
|
|
945
|
+
// behavior because a match with no usable name simply doesn't enter
|
|
946
|
+
// `candidatesByNode`, so a later (lower-priority) match can.
|
|
947
|
+
if (!nameNode || !nameNode.text)
|
|
948
|
+
continue;
|
|
949
|
+
const priority = definitionCapturePriority(definition.kindCaptureName);
|
|
950
|
+
const dedupeKey = `${definitionNode.startIndex}:${definitionNode.endIndex}`;
|
|
951
|
+
const existing = candidatesByNode.get(dedupeKey);
|
|
952
|
+
if (!existing || priority < existing.priority) {
|
|
953
|
+
candidatesByNode.set(dedupeKey, {
|
|
954
|
+
node: definitionNode,
|
|
955
|
+
nameNode,
|
|
956
|
+
kindCaptureName: definition.kindCaptureName,
|
|
957
|
+
priority
|
|
958
|
+
});
|
|
959
|
+
}
|
|
960
|
+
}
|
|
961
|
+
const symbols = [];
|
|
962
|
+
for (const candidate of candidatesByNode.values()) {
|
|
963
|
+
const kind = loaded.config.captureKindMap[candidate.kindCaptureName];
|
|
964
|
+
if (!kind)
|
|
965
|
+
continue;
|
|
966
|
+
const name = candidate.nameNode.text;
|
|
967
|
+
if (!loaded.config.isPublic(candidate.node, source, name))
|
|
968
|
+
continue;
|
|
969
|
+
const signature = buildSignature(candidate.node, source, loaded.config.bodyNodeTypes);
|
|
970
|
+
const docComment = collectAdjacentDocComment(candidate.node, source, loaded.config.docComment);
|
|
971
|
+
const sourceLine = candidate.node.startPosition.row + 1;
|
|
972
|
+
symbols.push({
|
|
973
|
+
name,
|
|
974
|
+
kind,
|
|
975
|
+
signature,
|
|
976
|
+
docComment,
|
|
977
|
+
tags: [],
|
|
978
|
+
sourceLine,
|
|
979
|
+
isDeprecated: false
|
|
980
|
+
});
|
|
981
|
+
}
|
|
982
|
+
symbols.sort((a, b) => a.sourceLine - b.sourceLine);
|
|
983
|
+
// File-level doc comment: language-specific. Rust's `//!` lives at file head.
|
|
984
|
+
const fileDocComment = loaded.config.id === 'rust' ? extractFileDocCommentRust(source) : null;
|
|
985
|
+
return {
|
|
986
|
+
sourcePath: relative,
|
|
987
|
+
moduleSlug: deriveModuleSlug(relative),
|
|
988
|
+
symbols,
|
|
989
|
+
fileDocComment
|
|
990
|
+
};
|
|
991
|
+
}
|
|
992
|
+
async function exists(filePath) {
|
|
993
|
+
try {
|
|
994
|
+
await fs.access(filePath);
|
|
995
|
+
return true;
|
|
996
|
+
}
|
|
997
|
+
catch {
|
|
998
|
+
return false;
|
|
999
|
+
}
|
|
1000
|
+
}
|
|
1001
|
+
export const treeSitterExtractor = {
|
|
1002
|
+
id: 'tree-sitter',
|
|
1003
|
+
async detect(rootDir) {
|
|
1004
|
+
// Claim the project iff (a) some configured language has a project signal in the
|
|
1005
|
+
// root (or a content match when `requireExtensionPresent` is set), AND (b) we can
|
|
1006
|
+
// actually load that language's vendored grammar. The grammar load is cheap on the
|
|
1007
|
+
// second call (cached) so detect() can be invoked freely.
|
|
1008
|
+
for (const config of LANGUAGES) {
|
|
1009
|
+
let signalMatched = false;
|
|
1010
|
+
if (config.projectSignals.length > 0) {
|
|
1011
|
+
for (const signal of config.projectSignals) {
|
|
1012
|
+
if (await exists(path.join(rootDir, signal))) {
|
|
1013
|
+
signalMatched = true;
|
|
1014
|
+
break;
|
|
1015
|
+
}
|
|
1016
|
+
}
|
|
1017
|
+
}
|
|
1018
|
+
else if (config.requireExtensionPresent) {
|
|
1019
|
+
// Content-based detect: short-circuit on first hit so we don't pay a full project
|
|
1020
|
+
// walk per call. With Bash registered (its only practical use of this flag), every
|
|
1021
|
+
// detect on a non-Bash project would otherwise scan the entire tree looking for a
|
|
1022
|
+
// single `.sh` file before falling through to other extractors.
|
|
1023
|
+
const include = config.walkOptions?.include ?? config.extensions.map((ext) => `**/*${ext}`);
|
|
1024
|
+
const exclude = config.walkOptions?.exclude;
|
|
1025
|
+
const found = await walkProjectSources(rootDir, { include, exclude, respectInternalConvention: false, limit: 1 });
|
|
1026
|
+
if (found.length > 0)
|
|
1027
|
+
signalMatched = true;
|
|
1028
|
+
}
|
|
1029
|
+
else {
|
|
1030
|
+
// Pure-extension-match languages with neither signals nor `requireExtensionPresent`
|
|
1031
|
+
// set: never claim. (No language ships in this state today; the branch exists as a
|
|
1032
|
+
// forward-compatibility guard so future configs can't accidentally hijack
|
|
1033
|
+
// signal-less projects.)
|
|
1034
|
+
continue;
|
|
1035
|
+
}
|
|
1036
|
+
if (!signalMatched)
|
|
1037
|
+
continue;
|
|
1038
|
+
const loaded = await loadGrammar(config);
|
|
1039
|
+
if (loaded) {
|
|
1040
|
+
return true;
|
|
1041
|
+
}
|
|
1042
|
+
}
|
|
1043
|
+
return false;
|
|
1044
|
+
},
|
|
1045
|
+
async walk(rootDir, options) {
|
|
1046
|
+
// When the caller passes explicit walkOptions we honor them as-is; otherwise we union
|
|
1047
|
+
// the per-language defaults so a project that mixes languages gets all of them
|
|
1048
|
+
// surfaced in one pass.
|
|
1049
|
+
if (options) {
|
|
1050
|
+
return walkProjectSources(rootDir, options);
|
|
1051
|
+
}
|
|
1052
|
+
const collected = [];
|
|
1053
|
+
for (const config of LANGUAGES) {
|
|
1054
|
+
// Skip languages that can't be loaded — no point walking files we can't parse.
|
|
1055
|
+
const loaded = await loadGrammar(config);
|
|
1056
|
+
if (!loaded)
|
|
1057
|
+
continue;
|
|
1058
|
+
const include = config.walkOptions?.include ?? defaultIncludeFor(config);
|
|
1059
|
+
const exclude = config.walkOptions?.exclude;
|
|
1060
|
+
const respectInternalConvention = config.walkOptions?.respectInternalConvention ?? false;
|
|
1061
|
+
const found = await walkProjectSources(rootDir, { include, exclude, respectInternalConvention });
|
|
1062
|
+
collected.push(...found);
|
|
1063
|
+
}
|
|
1064
|
+
// Sort + dedupe in case multiple language patterns capture the same path.
|
|
1065
|
+
return Array.from(new Set(collected)).sort();
|
|
1066
|
+
},
|
|
1067
|
+
async extract(sourcePath, options) {
|
|
1068
|
+
const rootDir = options?.rootDir ?? process.cwd();
|
|
1069
|
+
const config = languageForExtension(sourcePath);
|
|
1070
|
+
if (!config) {
|
|
1071
|
+
throw new Error(`treeSitterExtractor.extract: no configured language matches extension of ${sourcePath}`);
|
|
1072
|
+
}
|
|
1073
|
+
const loaded = await loadGrammar(config);
|
|
1074
|
+
if (!loaded) {
|
|
1075
|
+
throw new Error(`treeSitterExtractor.extract: vendored grammar for ${config.id} is missing — expected vendor/tree-sitter/${config.vendorSubdir}/tree-sitter-${config.id}.wasm`);
|
|
1076
|
+
}
|
|
1077
|
+
return extractWithGrammar(loaded, sourcePath, rootDir);
|
|
1078
|
+
}
|
|
1079
|
+
};
|