@xberg-io/tree-sitter-language-pack 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/README.md +155 -0
  2. package/index.d.ts +1025 -0
  3. package/index.js +102 -0
  4. package/package.json +54 -0
package/README.md ADDED
@@ -0,0 +1,155 @@
1
+ <p align="center">
2
+ <picture>
3
+ <source media="(prefers-color-scheme: dark)" srcset="https://cdn.jsdelivr.net/gh/xberg-io/assets@v1/banner/readme-banner-dark.svg">
4
+ <img alt="Xberg" width="420" src="https://cdn.jsdelivr.net/gh/xberg-io/assets@v1/banner/readme-banner-light.svg">
5
+ </picture>
6
+ </p>
7
+
8
+ # TypeScript / Node.js
9
+
10
+ <div align="center" style="display: flex; flex-wrap: wrap; gap: 8px; justify-content: center; margin: 20px 0">
11
+ <a href="https://github.com/xberg-io/alef">
12
+ <img src="https://img.shields.io/badge/Bindings-alef%20%D7%90-007ec6" alt="Bindings" />
13
+ </a>
14
+ <!-- Language Bindings -->
15
+ <a href="https://crates.io/crates/tree-sitter-language-pack">
16
+ <img src="https://img.shields.io/crates/v/tree-sitter-language-pack?label=Rust&color=007ec6" alt="Rust" />
17
+ </a>
18
+ <a href="https://pypi.org/project/tree-sitter-language-pack/">
19
+ <img src="https://img.shields.io/pypi/v/tree-sitter-language-pack?label=Python&color=007ec6" alt="Python" />
20
+ </a>
21
+ <a href="https://www.npmjs.com/package/@xberg-io/tree-sitter-language-pack">
22
+ <img
23
+ src="https://img.shields.io/npm/v/@xberg-io/tree-sitter-language-pack?label=Node.js&color=007ec6"
24
+ alt="Node.js"
25
+ />
26
+ </a>
27
+ <a href="https://www.npmjs.com/package/@xberg-io/tree-sitter-language-pack-wasm">
28
+ <img
29
+ src="https://img.shields.io/npm/v/@xberg-io/tree-sitter-language-pack-wasm?label=WASM&color=007ec6"
30
+ alt="WASM"
31
+ />
32
+ </a>
33
+ <a href="https://central.sonatype.com/artifact/io.xberg.treesitterlanguagepack/tree-sitter-language-pack">
34
+ <img
35
+ src="https://img.shields.io/maven-central/v/io.xberg.treesitterlanguagepack/tree-sitter-language-pack?label=Java&color=007ec6"
36
+ alt="Java"
37
+ />
38
+ </a>
39
+ <a href="https://pkg.go.dev/github.com/xberg-io/tree-sitter-language-pack/packages/go">
40
+ <img
41
+ src="https://img.shields.io/github/v/tag/xberg-io/tree-sitter-language-pack?label=Go&color=007ec6"
42
+ alt="Go"
43
+ />
44
+ </a>
45
+ <a href="https://www.nuget.org/packages/TreeSitterLanguagePack/">
46
+ <img src="https://img.shields.io/nuget/v/TreeSitterLanguagePack?label=C%23&color=007ec6" alt="C#" />
47
+ </a>
48
+ <a href="https://packagist.org/packages/xberg-io/tree-sitter-language-pack">
49
+ <img
50
+ src="https://img.shields.io/packagist/v/xberg-io/tree-sitter-language-pack?label=PHP&color=007ec6"
51
+ alt="PHP"
52
+ />
53
+ </a>
54
+ <a href="https://rubygems.org/gems/tree_sitter_language_pack">
55
+ <img src="https://img.shields.io/gem/v/tree_sitter_language_pack?label=Ruby&color=007ec6" alt="Ruby" />
56
+ </a>
57
+ <a href="https://hex.pm/packages/tree_sitter_language_pack">
58
+ <img src="https://img.shields.io/hexpm/v/tree_sitter_language_pack?label=Elixir&color=007ec6" alt="Elixir" />
59
+ </a>
60
+ <a href="https://pub.dev/packages/tree_sitter_language_pack">
61
+ <img src="https://img.shields.io/pub/v/tree_sitter_language_pack?label=Dart&color=007ec6" alt="Dart" />
62
+ </a>
63
+ <a href="https://central.sonatype.com/artifact/io.xberg.tslp.android/tree-sitter-language-pack-android">
64
+ <img
65
+ src="https://img.shields.io/maven-central/v/io.xberg.tslp.android/tree-sitter-language-pack-android?label=Kotlin&color=007ec6"
66
+ alt="Kotlin"
67
+ />
68
+ </a>
69
+ <a href="https://github.com/xberg-io/tree-sitter-language-pack/tree/main/packages/swift">
70
+ <img src="https://img.shields.io/badge/Swift-SPM-007ec6" alt="Swift" />
71
+ </a>
72
+ <a href="https://github.com/xberg-io/tree-sitter-language-pack/tree/main/packages/zig">
73
+ <img src="https://img.shields.io/badge/Zig-package-007ec6" alt="Zig" />
74
+ </a>
75
+ <a href="https://github.com/xberg-io/tree-sitter-language-pack/releases">
76
+ <img src="https://img.shields.io/badge/C-FFI-007ec6" alt="C FFI" />
77
+ </a>
78
+
79
+ <!-- Project Info -->
80
+ <a href="https://github.com/xberg-io/tree-sitter-language-pack/blob/main/LICENSE">
81
+ <img src="https://img.shields.io/badge/License-MIT-007ec6" alt="License" />
82
+ </a>
83
+ <a href="https://docs.tree-sitter-language-pack.xberg.io">
84
+ <img src="https://img.shields.io/badge/Docs-tree--sitter--language--pack-007ec6" alt="Documentation" />
85
+ </a>
86
+ </div>
87
+
88
+ <div align="center" style="display: flex; flex-wrap: wrap; gap: 12px; justify-content: center; margin: 28px 0 24px">
89
+ <a href="https://discord.gg/xt9WY3GnKR">
90
+ <img
91
+ height="22"
92
+ src="https://img.shields.io/badge/Discord-Chat-007ec6?logo=discord&logoColor=white"
93
+ alt="Join Discord"
94
+ />
95
+ </a>
96
+ </div>
97
+
98
+ Pre-compiled tree-sitter grammars for 306 programming languages with TypeScript types.
99
+
100
+ ## What This Package Provides
101
+
102
+ - **Parser access** — load a tree-sitter language parser by name without wiring individual grammar crates or packages.
103
+ - **Code intelligence primitives** — parse trees, functions, classes, imports, exports, symbols, docstrings, diagnostics, and syntax-aware chunks.
104
+ - **Shared cache model** — parsers are fetched and cached once, then reused by every call in the process.
105
+ - **Same catalog as every binding** — Rust, Python, Node.js, Go, Java, PHP, Ruby, .NET, Elixir, WASM, Dart, Kotlin Android, Swift, Zig, and C FFI use the same grammar set.
106
+ - **Node-first TypeScript API** — native NAPI package with typed parser and query helpers.
107
+
108
+ ## Installation
109
+
110
+ ```bash
111
+ npm install @xberg-io/tree-sitter-language-pack
112
+ ```
113
+
114
+ ## Quick Start
115
+
116
+ ```typescript
117
+ import { getParser } from "@xberg-io/tree-sitter-language-pack";
118
+
119
+ const parser = getParser("python");
120
+ const tree = parser.parse("def hello(): pass");
121
+ console.log(tree.rootNode.toString());
122
+ ```
123
+
124
+ ## Features
125
+
126
+ - **300+ languages** — pre-compiled tree-sitter grammars covering every major programming language and many minor ones.
127
+ - **On-demand download + cache** — parsers fetched at first use; subsequent runs hit the local cache.
128
+ - **Code intelligence** — extract functions, classes, imports, exports, symbols, docstrings, and diagnostics with one API.
129
+ - **Syntax-aware chunking** — semantic chunks for RAG/LLM pipelines.
130
+ - **Polyglot bindings** — native APIs across 15 languages: Rust, Python, TypeScript/Node.js, Go, Java, C#, Ruby, PHP, Elixir, WebAssembly, Dart, Kotlin, Swift, Zig, and C/C++ via [alef](https://github.com/xberg-io/alef).
131
+
132
+ ## Documentation
133
+
134
+ - **[Documentation](https://docs.tree-sitter-language-pack.xberg.io)** -- Full docs and API reference
135
+ - **[GitHub Repository](https://github.com/xberg-io/tree-sitter-language-pack)** -- Source, issues, and discussions
136
+
137
+ ## Part of Xberg
138
+
139
+ - [Xberg](https://github.com/xberg-io/xberg) — document intelligence: text, tables, metadata from 91+ formats with optional OCR.
140
+ - [Xberg Enterprise](https://github.com/xberg-io/xberg-enterprise) — managed extraction API with SDKs, dashboards, and observability.
141
+ - [crawlberg](https://github.com/xberg-io/crawlberg) — web crawling and scraping with HTML→Markdown and headless-Chrome fallback.
142
+ - [html-to-markdown](https://github.com/xberg-io/html-to-markdown) — fast, lossless HTML→Markdown engine.
143
+ - [liter-llm](https://github.com/xberg-io/liter-llm) — universal LLM API client with native bindings for 14 languages and 143 providers.
144
+ - [alef](https://github.com/xberg-io/alef) — the polyglot binding generator that produces this README and all per-language bindings.
145
+ - [Discord](https://discord.gg/xt9WY3GnKR) — community, roadmap, announcements.
146
+
147
+ ## Contributing
148
+
149
+ Contributions are welcome! See [CONTRIBUTING.md](https://github.com/xberg-io/tree-sitter-language-pack/blob/main/CONTRIBUTING.md) for guidelines.
150
+
151
+ Join our [Discord community](https://discord.gg/xt9WY3GnKR) for questions and discussion.
152
+
153
+ ## License
154
+
155
+ MIT -- see [LICENSE](https://github.com/xberg-io/tree-sitter-language-pack/blob/main/LICENSE) for details.
package/index.d.ts ADDED
@@ -0,0 +1,1025 @@
1
+ // This file is auto-generated by alef — DO NOT EDIT.
2
+ // alef:hash:d0b7cd41ab5c9284ea1bdf3bc59373cf38b0a5148eb158533b7694f35060156f
3
+ // To regenerate: alef generate
4
+ // To verify freshness: alef verify --exit-code
5
+ /* eslint-disable */
6
+ import type { Language } from "tree-sitter";
7
+
8
+ export type JsonValue = string | number | boolean | null | JsonValue[] | { [key: string]: JsonValue };
9
+
10
+ /**
11
+ * List all available language names (sorted, deduplicated, includes aliases).
12
+ *
13
+ * Returns names of both statically compiled and dynamically loadable languages,
14
+ * plus any configured aliases.
15
+ */
16
+ export declare function availableLanguages(): Array<string>;
17
+
18
+ /**
19
+ * Return the effective cache directory path.
20
+ *
21
+ * This is either the custom path set via [`configure`] / [`init`] or the
22
+ * default: `~/.cache/tree-sitter-language-pack/v{version}/libs/`.
23
+ * @throws Returns an error if the system cache directory cannot be determined.
24
+ */
25
+ export declare function cacheDir(): string;
26
+
27
+ /**
28
+ * Delete all cached parser shared libraries.
29
+ *
30
+ * Resets the cache registration so the next call to [`get_language`] or
31
+ * a download function will re-register the (now empty) cache directory.
32
+ * @throws Returns an error if the cache directory cannot be removed.
33
+ */
34
+ export declare function cleanCache(): void;
35
+
36
+ /**
37
+ * Apply download configuration without downloading anything.
38
+ *
39
+ * Use this to set a custom cache directory before the first call to
40
+ * [`get_language`] or any download function. Changing the cache dir
41
+ * after languages have been registered has no effect on already-loaded
42
+ * languages.
43
+ * @throws Returns an error if the lock cannot be acquired.
44
+ */
45
+ export declare function configure(config?: PackConfig | undefined | null): void;
46
+
47
+ /**
48
+ * Detect language name from a file path or extension.
49
+ *
50
+ * This compatibility alias matches the pre-Alef Python binding API.
51
+ */
52
+ export declare function detectLanguage(path: string): string | null;
53
+
54
+ /**
55
+ * Detect language name from file content using the shebang line (`#!`).
56
+ *
57
+ * Inspects only the first line of `content`. If it begins with `#!`, the
58
+ * interpreter name is extracted and mapped to a language name.
59
+ *
60
+ * Handles common patterns:
61
+ * - `#!/usr/bin/env python3` → `"python"`
62
+ * - `#!/bin/bash` → `"bash"`
63
+ * - `#!/usr/bin/env node` → `"javascript"`
64
+ *
65
+ * The `-S` flag accepted by some `env` implementations is skipped automatically.
66
+ * Version suffixes (e.g. `python3.11`, `ruby3.2`) are stripped before matching.
67
+ *
68
+ * Returns `None` when content does not start with `#!`, the shebang is
69
+ * malformed, or the interpreter is not recognised.
70
+ *
71
+ * ```
72
+ * use tree_sitter_language_pack::detect_language_from_content;
73
+ * assert_eq!(detect_language_from_content("#!/usr/bin/env python3\npass"), Some("python"));
74
+ * assert_eq!(detect_language_from_content("#!/bin/bash\necho hi"), Some("bash"));
75
+ * assert_eq!(detect_language_from_content("no shebang here"), None);
76
+ * ```
77
+ */
78
+ export declare function detectLanguageFromContent(content: string): string | null;
79
+
80
+ /**
81
+ * Detect language name from a file extension (without leading dot).
82
+ *
83
+ * Returns `None` for unrecognized extensions. The match is case-insensitive.
84
+ *
85
+ * ```
86
+ * use tree_sitter_language_pack::detect_language_from_extension;
87
+ * assert_eq!(detect_language_from_extension("py"), Some("python"));
88
+ * assert_eq!(detect_language_from_extension("RS"), Some("rust"));
89
+ * assert_eq!(detect_language_from_extension("xyz"), None);
90
+ * ```
91
+ */
92
+ export declare function detectLanguageFromExtension(ext: string): string | null;
93
+
94
+ /**
95
+ * Detect language name from a file path.
96
+ *
97
+ * Extracts the file extension and looks it up. Returns `None` if the
98
+ * path has no extension or the extension is not recognized.
99
+ *
100
+ * ```
101
+ * use tree_sitter_language_pack::detect_language_from_path;
102
+ * assert_eq!(detect_language_from_path("src/main.rs"), Some("rust"));
103
+ * assert_eq!(detect_language_from_path("README.md"), Some("markdown"));
104
+ * assert_eq!(detect_language_from_path("Makefile"), None);
105
+ * ```
106
+ */
107
+ export declare function detectLanguageFromPath(path: string): string | null;
108
+
109
+ /**
110
+ * Download specific languages to the local cache.
111
+ *
112
+ * Returns the number of requested languages available after the call. Already
113
+ * compiled or cached languages are included in the count.
114
+ * @throws Returns an error if any language is not available in the manifest or if
115
+ * the download fails.
116
+ */
117
+ export declare function download(names: Array<string>): number;
118
+
119
+ /**
120
+ * Download all available languages from the remote manifest.
121
+ *
122
+ * Downloads the platform bundle and extracts every library it contains.
123
+ * Languages that appear in the manifest but are absent from the bundle
124
+ * (e.g. grammars that failed to compile at release time) are silently
125
+ * skipped — they are not treated as an error.
126
+ *
127
+ * Returns the total number of languages now available (statically compiled
128
+ * plus downloaded and cached).
129
+ * @throws Returns an error if the manifest cannot be fetched or the bundle download fails.
130
+ */
131
+ export declare function downloadAll(): number;
132
+
133
+ /**
134
+ * Return languages that are already downloaded and cached locally.
135
+ *
136
+ * Does not perform any network requests. Returns an empty list if the
137
+ * cache directory does not exist or cannot be read.
138
+ */
139
+ export declare function downloadedLanguages(): Array<string>;
140
+
141
+ /**
142
+ * Download every language in a named group (e.g. `"web"`, `"data"`).
143
+ *
144
+ * Groups are defined in the remote manifest and let you ensure a curated
145
+ * set of related grammars in one call instead of listing each name to
146
+ * [`download()`]. Already-cached languages are skipped.
147
+ *
148
+ * Returns the total number of languages now available (statically compiled
149
+ * plus downloaded and cached).
150
+ * @throws Returns an error if the manifest cannot be fetched, the group is unknown,
151
+ * or any constituent language fails to download.
152
+ */
153
+ export declare function downloadGroup(name: string): number;
154
+
155
+ /**
156
+ * Get the highlights query for a language, if bundled.
157
+ *
158
+ * Returns the contents of `highlights.scm` as a static string, or `None`
159
+ * if no highlights query is bundled for this language.
160
+ */
161
+ export declare function getHighlightsQuery(language: string): string | null;
162
+
163
+ /**
164
+ * Get the injections query for a language, if bundled.
165
+ *
166
+ * Returns the contents of `injections.scm` as a static string, or `None`
167
+ * if no injections query is bundled for this language.
168
+ */
169
+ export declare function getInjectionsQuery(language: string): string | null;
170
+
171
+ /**
172
+ * Get a tree-sitter [`Language`] by name using the global registry.
173
+ *
174
+ * Resolves language aliases (e.g., `"shell"` maps to `"bash"`).
175
+ * When the `download` feature is enabled (default), automatically downloads
176
+ * the parser from GitHub releases if not found locally.
177
+ * @throws Returns [`Error::LanguageNotFound`] if the language is not recognized,
178
+ * or [`Error::Download`] if auto-download fails.
179
+ */
180
+ export declare function getLanguage(name: string): Language;
181
+
182
+ /**
183
+ * Get the locals query for a language, if bundled.
184
+ *
185
+ * Returns the contents of `locals.scm` as a static string, or `None`
186
+ * if no locals query is bundled for this language.
187
+ */
188
+ export declare function getLocalsQuery(language: string): string | null;
189
+
190
+ /**
191
+ * Get a [`Parser`] pre-configured for the given language.
192
+ *
193
+ * This is a convenience function that calls [`get_language`] and configures
194
+ * a new parser in one step.
195
+ * @throws Returns [`Error::LanguageNotFound`] if the language is not recognized, or
196
+ * [`Error::ParserSetup`] if the language cannot be applied to the parser.
197
+ */
198
+ export declare function getParser(name: string): Parser;
199
+
200
+ /**
201
+ * Get the tags query for a language, if bundled.
202
+ *
203
+ * Returns the contents of `tags.scm` as a static string, or `None`
204
+ * if no tags query is bundled for this language.
205
+ */
206
+ export declare function getTagsQuery(language: string): string | null;
207
+
208
+ /**
209
+ * Check if a language is available by name or alias.
210
+ *
211
+ * Returns `true` if the language can be loaded (statically compiled,
212
+ * dynamically available, or a known alias for one of these).
213
+ */
214
+ export declare function hasLanguage(name: string): boolean;
215
+
216
+ /**
217
+ * Initialize the language pack with the given configuration.
218
+ *
219
+ * Applies any custom cache directory, then downloads all languages and groups
220
+ * specified in the config. This is the recommended entry point when you want
221
+ * to pre-warm the cache before use.
222
+ * @throws Returns an error if configuration cannot be applied or if downloads fail.
223
+ */
224
+ export declare function init(config?: PackConfig | undefined | null): void;
225
+
226
+ /** A byte range — start (inclusive) to end (exclusive). */
227
+ export interface ByteRange {
228
+ /** Inclusive start byte offset. */
229
+ readonly start: number;
230
+ /** Exclusive end byte offset. */
231
+ readonly end: number;
232
+ }
233
+
234
+ /** Metadata for a single chunk of source code. */
235
+ export interface ChunkContext {
236
+ /** Language name used to parse this chunk. */
237
+ readonly language?: string;
238
+ /** Zero-indexed position of this chunk within the file's chunk list. */
239
+ readonly chunkIndex?: number;
240
+ /** Total number of chunks the file was split into. */
241
+ readonly totalChunks?: number;
242
+ /** Tree-sitter node kinds that appear at the top level of this chunk. */
243
+ readonly nodeTypes?: Array<string>;
244
+ /** Hierarchical path of enclosing structural items (e.g., `["MyClass", "my_method"]`). */
245
+ readonly contextPath?: Array<string>;
246
+ /** Names of symbols defined within this chunk. */
247
+ readonly symbolsDefined?: Array<string>;
248
+ /** Comments contained within this chunk. */
249
+ readonly comments?: Array<CommentInfo>;
250
+ /** Docstrings contained within this chunk. */
251
+ readonly docstrings?: Array<DocstringInfo>;
252
+ /** Whether this chunk contains any tree-sitter error nodes. */
253
+ readonly hasErrorNodes?: boolean;
254
+ }
255
+
256
+ /** A chunk of source code with rich metadata. */
257
+ export interface CodeChunk {
258
+ /** The raw source text of this chunk. */
259
+ readonly content?: string;
260
+ /** Inclusive start byte offset of this chunk in the original source. */
261
+ readonly startByte?: number;
262
+ /** Exclusive end byte offset of this chunk in the original source. */
263
+ readonly endByte?: number;
264
+ /** Zero-indexed start line of this chunk. */
265
+ readonly startLine?: number;
266
+ /** Zero-indexed end line of this chunk. */
267
+ readonly endLine?: number;
268
+ /** Contextual metadata about this chunk. */
269
+ readonly metadata?: ChunkContext;
270
+ }
271
+
272
+ /** A comment extracted from source code. */
273
+ export interface CommentInfo {
274
+ /** The raw text content of the comment. */
275
+ readonly text?: string;
276
+ /** The kind of comment (line, block, or doc). */
277
+ readonly kind?: CommentKind;
278
+ /** Source span covering the comment. */
279
+ readonly span?: Span;
280
+ /** Name of the syntax node this comment is directly associated with. */
281
+ readonly associatedNode?: string;
282
+ }
283
+
284
+ /**
285
+ * The kind of a comment found in source code.
286
+ *
287
+ * Distinguishes between single-line comments, block (multi-line) comments,
288
+ * and documentation comments.
289
+ */
290
+ export declare enum CommentKind {
291
+ /** A single-line comment (e.g., `// ...` or `# ...`). */
292
+ Line = "Line",
293
+ /** A block or multi-line comment using slash-star delimiters. */
294
+ Block = "Block",
295
+ /** A documentation comment such as `/// ...` or slash-double-star block. */
296
+ Doc = "Doc",
297
+ }
298
+
299
+ /**
300
+ * An XML-style attribute attached to an [`Element`](DataNodeKind::Element) node.
301
+ *
302
+ * Populated only for `DataNodeKind::Element`; always empty for `KeyValue` and
303
+ * `Sequence` nodes.
304
+ */
305
+ export interface DataAttribute {
306
+ /** Attribute name (e.g. `"class"`, `"href"`). */
307
+ readonly name?: string;
308
+ /** Attribute value as a raw string (quotes stripped). */
309
+ readonly value?: string;
310
+ /** Source span covering the entire `name="value"` attribute token. */
311
+ readonly span?: Span;
312
+ }
313
+
314
+ /**
315
+ * A node in the hierarchical data tree produced by data-format extraction.
316
+ *
317
+ * When [`ProcessConfig::data_extraction`](crate::ProcessConfig::data_extraction) is
318
+ * `true`, [`ProcessResult::data`] is populated with a root `DataNode` whose
319
+ * [`children`](DataNode::children) mirror the structure of the parsed file.
320
+ *
321
+ * The `kind` field determines which other fields are meaningful:
322
+ *
323
+ * | `kind` | `key` | `value` | `attributes` | `children` |
324
+ * |------------|--------------------------|---------------|--------------|------------|
325
+ * | `KeyValue` | key / mapping key / index | leaf value | empty | nested map |
326
+ * | `Element` | XML tag name | text content | XML attrs | child elements |
327
+ * | `Sequence` | positional index (`"0"`) | leaf value | empty | sub-items |
328
+ */
329
+ export interface DataNode {
330
+ /** Whether this node is a key/value pair, XML element, or sequence item. */
331
+ readonly kind?: DataNodeKind;
332
+ /**
333
+ * Key, attribute name, tag name, or positional index (`"0"`, `"1"`, …).
334
+ * `None` at the document root.
335
+ */
336
+ readonly key?: string;
337
+ /**
338
+ * Leaf scalar value, if any. `None` for containers (objects, arrays, XML elements
339
+ * with child elements).
340
+ */
341
+ readonly value?: string;
342
+ /** Attributes on element-shape nodes (XML `STag` attributes). Empty for all other kinds. */
343
+ readonly attributes?: Array<DataAttribute>;
344
+ /** Children for nested containers and XML element bodies. */
345
+ readonly children?: Array<DataNode>;
346
+ /** Source span covering this node in the original source file. */
347
+ readonly span?: Span;
348
+ }
349
+
350
+ /**
351
+ * The kind of a data node extracted from a data-format file.
352
+ *
353
+ * Classifies each node in the hierarchical [`DataNode`] tree returned when
354
+ * `data_extraction` is enabled on `ProcessConfig`.
355
+ *
356
+ * # Wire format (public JSON contract)
357
+ *
358
+ * Unit variants serialize as a bare string (`"KeyValue"`). DO NOT add
359
+ * `#[serde(tag = "...")]` or rename variants — every language binding has a
360
+ * hand-written deserializer matching this exact shape, and any change breaks
361
+ * all bindings' `process()` tests simultaneously.
362
+ * Covered by `tests/wire_format.rs`.
363
+ */
364
+ export declare enum DataNodeKind {
365
+ /**
366
+ * A key/value pair or mapping (json/toml/properties/yaml/hcl/cue/kdl pair,
367
+ * or a wrapper "object"/"mapping" container).
368
+ */
369
+ KeyValue = "KeyValue",
370
+ /** An XML element with a tag name in `key` and attributes in `attributes`. */
371
+ Element = "Element",
372
+ /**
373
+ * A positional sequence item (JSON array element, YAML block sequence item,
374
+ * CSV/PSV row or cell).
375
+ */
376
+ Sequence = "Sequence",
377
+ }
378
+
379
+ /** A diagnostic (syntax error, missing node, etc.) from parsing. */
380
+ export interface Diagnostic {
381
+ /** Human-readable description of the diagnostic. */
382
+ readonly message?: string;
383
+ /** Severity of the diagnostic. */
384
+ readonly severity?: DiagnosticSeverity;
385
+ /** Source span where the diagnostic was detected. */
386
+ readonly span?: Span;
387
+ }
388
+
389
+ /**
390
+ * Severity level of a diagnostic produced during parsing.
391
+ *
392
+ * Used to classify parse errors, warnings, and informational messages
393
+ * found in the syntax tree.
394
+ */
395
+ export declare enum DiagnosticSeverity {
396
+ /** A parse error (e.g., an `ERROR` or `MISSING` node in the tree). */
397
+ Error = "Error",
398
+ /** A warning-level diagnostic. */
399
+ Warning = "Warning",
400
+ /** An informational diagnostic. */
401
+ Info = "Info",
402
+ }
403
+
404
+ /** A section within a docstring (e.g., Args, Returns, Raises). */
405
+ export interface DocSection {
406
+ /** Section kind (e.g., `"args"`, `"returns"`, `"raises"`). */
407
+ readonly kind?: string;
408
+ /** Parameter or return value name, if applicable. */
409
+ readonly name?: string;
410
+ /** Description text for this section. */
411
+ readonly description?: string;
412
+ }
413
+
414
+ /**
415
+ * The format of a docstring extracted from source code.
416
+ *
417
+ * Identifies the docstring convention used, which varies by language
418
+ * (e.g., Python triple-quoted strings, JSDoc, Rustdoc `///` comments).
419
+ *
420
+ * # Wire format (public JSON contract)
421
+ *
422
+ * Unit variants serialize as a bare string (`"JSDoc"`); the `Other`
423
+ * variant serializes as a single-keyed object (`{"Other": "rst"}`). DO
424
+ * NOT add `#[serde(tag = "...")]`. Covered by `tests/wire_format.rs`.
425
+ */
426
+ export declare enum DocstringFormat {
427
+ /** Python triple-quoted string docstring (`"""..."""`). */
428
+ PythonTripleQuote = "PythonTripleQuote",
429
+ /**
430
+ * JavaScript/TypeScript JSDoc block comment (opens with two stars, closes
431
+ * with star-slash).
432
+ */
433
+ JSDoc = "JSDoc",
434
+ /** Rust `///` or `//!` doc comment. */
435
+ Rustdoc = "Rustdoc",
436
+ /** Go doc comment (a comment block immediately preceding a declaration). */
437
+ GoDoc = "GoDoc",
438
+ /**
439
+ * Java Javadoc block comment (opens with two stars, closes with
440
+ * star-slash).
441
+ */
442
+ JavaDoc = "JavaDoc",
443
+ /** A language-specific docstring format not covered by the standard variants. */
444
+ Other = "Other",
445
+ }
446
+
447
+ /** A docstring extracted from source code. */
448
+ export interface DocstringInfo {
449
+ /** The raw text of the docstring. */
450
+ readonly text?: string;
451
+ /** The docstring format (Python, JSDoc, Rustdoc, etc.). */
452
+ readonly format?: DocstringFormat;
453
+ /** Source span covering the docstring. */
454
+ readonly span?: Span;
455
+ /** Name of the item this docstring documents. */
456
+ readonly associatedItem?: string;
457
+ /** Parsed sections of the docstring (Args, Returns, Raises, etc.). */
458
+ readonly parsedSections?: Array<DocSection>;
459
+ }
460
+
461
+ /** Manages downloading and caching of pre-built parser shared libraries. */
462
+ export declare class DownloadManager {
463
+ /** Create a new download manager for the given version. */
464
+ static new(version: string): DownloadManager;
465
+ /** List languages that are already downloaded and cached. */
466
+ installedLanguages(): Array<string>;
467
+ /**
468
+ * Download the platform bundle and extract every library file it contains.
469
+ *
470
+ * Unlike [`Self::ensure_languages`], this does not check the manifest language list
471
+ * against archive contents — it simply extracts all `.so`/`.dylib`/`.dll` files
472
+ * from the bundle. Languages in the manifest that are missing from the archive
473
+ * are silently ignored rather than returning an error.
474
+ *
475
+ * Returns the number of library files extracted (including those already cached).
476
+ */
477
+ downloadAllBestEffort(): number;
478
+ /**
479
+ * Remove all cached parser libraries.
480
+ *
481
+ * Acquires the cross-process lock so `clean_cache` cannot race a concurrent
482
+ * downloader (avoids Windows sharing-violation errors against an in-flight
483
+ * bundle write). The `.download.lock` file itself is **not** removed — it is
484
+ * permanent infrastructure; deleting it could allow a concurrent process that
485
+ * already opened the file to continue holding a stale lock handle while a new
486
+ * process opens a fresh inode, breaking the mutual-exclusion guarantee.
487
+ */
488
+ cleanCache(): void;
489
+ }
490
+
491
+ /** An export statement extracted from source code. */
492
+ export interface ExportInfo {
493
+ /** The exported name. */
494
+ readonly name?: string;
495
+ /** The kind of export (named, default, or re-export). */
496
+ readonly kind?: ExportKind;
497
+ /** Source span covering the export statement. */
498
+ readonly span?: Span;
499
+ }
500
+
501
+ /**
502
+ * The kind of an export statement found in source code.
503
+ *
504
+ * Covers named exports, default exports, and re-exports from other modules.
505
+ */
506
+ export declare enum ExportKind {
507
+ /** A named export (e.g., `export { foo }`). */
508
+ Named = "Named",
509
+ /** A default export (e.g., `export default foo`). */
510
+ Default = "Default",
511
+ /** A re-export from another module (e.g., `export { foo } from 'bar'`). */
512
+ ReExport = "ReExport",
513
+ }
514
+
515
+ /** Aggregate metrics for a source file. */
516
+ export interface FileMetrics {
517
+ /** Total number of lines (including blank and comment lines). */
518
+ readonly totalLines?: number;
519
+ /** Number of lines containing non-blank, non-comment source code. */
520
+ readonly codeLines?: number;
521
+ /** Number of lines that are entirely comments. */
522
+ readonly commentLines?: number;
523
+ /** Number of blank (whitespace-only) lines. */
524
+ readonly blankLines?: number;
525
+ /** Total byte length of the source file. */
526
+ readonly totalBytes?: number;
527
+ /** Total number of nodes in the syntax tree. */
528
+ readonly nodeCount?: number;
529
+ /** Number of error nodes in the syntax tree (parse errors). */
530
+ readonly errorCount?: number;
531
+ /** Maximum nesting depth reached in the syntax tree. */
532
+ readonly maxDepth?: number;
533
+ }
534
+
535
+ /** An import statement extracted from source code. */
536
+ export interface ImportInfo {
537
+ /** The module or path being imported from. */
538
+ readonly source?: string;
539
+ /** Specific names imported from the source module. */
540
+ readonly items?: Array<string>;
541
+ /** Alias assigned to the import (e.g., `import numpy as np`). */
542
+ readonly alias?: string;
543
+ /** Whether this is a wildcard import (e.g., `import *` or `use foo::*`). */
544
+ readonly isWildcard?: boolean;
545
+ /** Source span covering the import statement. */
546
+ readonly span?: Span;
547
+ }
548
+
549
+ /**
550
+ * Thread-safe registry of tree-sitter language parsers.
551
+ *
552
+ * Manages both statically compiled and dynamically loaded language grammars.
553
+ * Use [`LanguageRegistry::new()`] for the default registry, or access the
554
+ * global instance via the module-level convenience functions
555
+ * (`get_language`, `available_languages`, etc.).
556
+ */
557
+ export declare class LanguageRegistry {
558
+ /**
559
+ * Create a new registry populated with all statically compiled languages.
560
+ *
561
+ * When the `dynamic-loading` feature is enabled, the registry also knows
562
+ * about dynamically loadable grammars and will load them on demand.
563
+ */
564
+ static new(): LanguageRegistry;
565
+ /**
566
+ * Get a tree-sitter [`Language`] by name.
567
+ *
568
+ * Resolves aliases (e.g., `"shell"` -> `"bash"`, `"makefile"` -> `"make"`),
569
+ * then looks up the language in the static table. When the `dynamic-loading`
570
+ * feature is enabled, falls back to loading a shared library on demand.
571
+ * @throws Returns [`Error::LanguageNotFound`] if the name (after alias resolution)
572
+ * does not match any known grammar.
573
+ */
574
+ getLanguage(name: string): Language;
575
+ /**
576
+ * List all available language names, sorted and deduplicated.
577
+ *
578
+ * Includes statically compiled languages, dynamically loadable languages
579
+ * (if the `dynamic-loading` feature is enabled), and all configured aliases.
580
+ */
581
+ availableLanguages(): Array<string>;
582
+ /**
583
+ * Check whether a parser is statically compiled into this build.
584
+ *
585
+ * Returns `true` only when the grammar was compiled in at build time
586
+ * (i.e. it appears in the `STATIC_LANGUAGES` table). This is independent
587
+ * of the extension-to-language mapping: `detect_language_from_extension`
588
+ * consults the static ext table for all 306 grammars regardless of which
589
+ * parsers are compiled in.
590
+ *
591
+ * Use this when you need to distinguish "we know the language name" from
592
+ * "we can actually parse files in that language right now".
593
+ *
594
+ * ```no_run
595
+ * use tree_sitter_language_pack::{detect_language_from_extension, LanguageRegistry};
596
+ *
597
+ * let registry = LanguageRegistry::new();
598
+ * // Extension detection uses the static table — independent of compiled parsers.
599
+ * let lang = detect_language_from_extension("feature"); // always returns Some("gherkin")
600
+ * // Parser availability depends on which grammars were compiled in.
601
+ * let can_parse = lang.map(|name| registry.has_parser(name)).unwrap_or(false);
602
+ * ```
603
+ */
604
+ hasParser(name: string): boolean;
605
+ /**
606
+ * Check whether a language is available by name or alias.
607
+ *
608
+ * Returns `true` if the language can be loaded, either from the static
609
+ * table or from a dynamic library on disk.
610
+ */
611
+ hasLanguage(name: string): boolean;
612
+ /** Return the total number of available languages (including aliases). */
613
+ languageCount(): number;
614
+ /** Parse source code and extract file intelligence based on config in a single pass. */
615
+ process(source: string, config?: ProcessConfig | undefined | null): ProcessResult;
616
+ static default(): LanguageRegistry;
617
+ }
618
+
619
+ /**
620
+ * A single syntax node within a [`Tree`].
621
+ *
622
+ * Nodes hold a strong reference to their parent tree so they remain valid
623
+ * regardless of how the tree is moved or stored at the FFI boundary.
624
+ */
625
+ export declare class Node {
626
+ clone(): Node;
627
+ /** Return the node's kind name (e.g. `"function_definition"`). */
628
+ kind(): string;
629
+ /**
630
+ * Return the node's numeric kind ID.
631
+ *
632
+ * Tree-sitter assigns a stable `u16` ID to every node kind in a grammar
633
+ * (e.g. `"function_definition" → 42`). Comparing `kind_id()` is cheaper
634
+ * than comparing the string [`kind()`](Self::kind) in tight AST loops.
635
+ */
636
+ kindId(): number;
637
+ /** Return the inclusive start byte offset of this node. */
638
+ startByte(): number;
639
+ /** Return the exclusive end byte offset of this node. */
640
+ endByte(): number;
641
+ /**
642
+ * Return the node's byte range as a [`ByteRange`].
643
+ *
644
+ * Callers should slice their own source bytes — this is a zero-copy
645
+ * text accessor.
646
+ */
647
+ byteRange(): ByteRange;
648
+ /** Return the start [`Point`] (row, column). */
649
+ startPosition(): Point;
650
+ /** Return the end [`Point`] (row, column). */
651
+ endPosition(): Point;
652
+ /** True when this node is named (not punctuation/whitespace). */
653
+ isNamed(): boolean;
654
+ /** True when this is an error node. */
655
+ isError(): boolean;
656
+ /** True when this is a missing-token node. */
657
+ isMissing(): boolean;
658
+ /** True when this is an "extra" node (e.g. a comment). */
659
+ isExtra(): boolean;
660
+ /** True when this node or any descendant is an error. */
661
+ hasError(): boolean;
662
+ /** Return this node's parent, if any. */
663
+ parent(): Node | null;
664
+ /** Return the i-th child of this node, if any. */
665
+ child(index: number): Node | null;
666
+ /** Total number of children (including unnamed). */
667
+ childCount(): number;
668
+ /** Return the i-th named child of this node, if any. */
669
+ namedChild(index: number): Node | null;
670
+ /** Number of named children of this node. */
671
+ namedChildCount(): number;
672
+ /** Look up a child by its grammar-defined field name. */
673
+ childByFieldName(name: string): Node | null;
674
+ /** Return the S-expression form of this node's subtree. */
675
+ toSexp(): string;
676
+ /** Return a [`TreeCursor`] positioned at this node. */
677
+ walk(): TreeCursor;
678
+ }
679
+
680
+ /**
681
+ * Configuration for the tree-sitter language pack.
682
+ *
683
+ * Controls cache directory and which languages to pre-download.
684
+ * Can be loaded from a TOML file, constructed programmatically,
685
+ * or passed as a dict/object from language bindings.
686
+ */
687
+ export interface PackConfig {
688
+ /**
689
+ * Override default cache directory.
690
+ *
691
+ * Default: `~/.cache/tree-sitter-language-pack/v{version}/libs/`
692
+ */
693
+ readonly cacheDir?: string;
694
+ /**
695
+ * Languages to pre-download on init.
696
+ *
697
+ * Each entry is a language name (e.g. `"python"`, `"rust"`).
698
+ */
699
+ readonly languages?: Array<string>;
700
+ /** Language groups to pre-download (e.g. `"web"`, `"systems"`, `"scripting"`). */
701
+ readonly groups?: Array<string>;
702
+ }
703
+
704
+ /** A tree-sitter parser configured for one language at a time. */
705
+ export declare class Parser {
706
+ /**
707
+ * Construct a new parser with no language set.
708
+ *
709
+ * Call [`Parser::set_language`] before parsing.
710
+ */
711
+ static new(): Parser;
712
+ /**
713
+ * Configure the parser to use the language identified by name (e.g. `"python"`).
714
+ *
715
+ * Resolves the language through the global registry — auto-downloading
716
+ * if necessary, when the `download` feature is enabled.
717
+ * @throws Returns [`Error::LanguageNotFound`] if the language is not recognized,
718
+ * or [`Error::ParserSetup`] if the language ABI is incompatible.
719
+ */
720
+ setLanguage(name: string): void;
721
+ /**
722
+ * Parse a UTF-8 source string. Returns `None` if parsing was cancelled
723
+ * or no language is set.
724
+ */
725
+ parse(source: string): Tree | null;
726
+ /**
727
+ * Parse a raw byte slice. Returns `None` if parsing was cancelled or
728
+ * no language is set.
729
+ */
730
+ parseBytes(source: Uint8Array): Tree | null;
731
+ /**
732
+ * Reset internal state. The next call to [`parse`](Self::parse) will
733
+ * not be incremental.
734
+ */
735
+ reset(): void;
736
+ static default(): Parser;
737
+ }
738
+
739
+ /** A source position — row + column, zero-indexed. */
740
+ export interface Point {
741
+ /** Zero-indexed row number. */
742
+ readonly row: number;
743
+ /** Zero-indexed column number, in UTF-16 code units. */
744
+ readonly column: number;
745
+ }
746
+
747
+ /**
748
+ * Configuration for the `process()` function.
749
+ *
750
+ * Controls which analysis features are enabled and whether chunking is performed.
751
+ */
752
+ export interface ProcessConfig {
753
+ /** Language name (required). */
754
+ readonly language?: string;
755
+ /** Extract structural items (functions, classes, etc.). Default: true. */
756
+ readonly structure?: boolean;
757
+ /** Extract import statements. Default: true. */
758
+ readonly imports?: boolean;
759
+ /** Extract export statements. Default: true. */
760
+ readonly exports?: boolean;
761
+ /** Extract comments. Default: false. */
762
+ readonly comments?: boolean;
763
+ /** Extract docstrings. Default: false. */
764
+ readonly docstrings?: boolean;
765
+ /** Extract symbol definitions. Default: false. */
766
+ readonly symbols?: boolean;
767
+ /** Include parse diagnostics. Default: false. */
768
+ readonly diagnostics?: boolean;
769
+ /** Maximum chunk size in bytes. `None` disables chunking. */
770
+ readonly chunkMaxSize?: number;
771
+ /**
772
+ * Extract hierarchical key/value data tree from data-format files. Default: false.
773
+ *
774
+ * When `true`, [`ProcessResult::data`](crate::ProcessResult::data) is populated
775
+ * with a [`DataNode`](crate::DataNode) tree for supported languages: JSON, YAML,
776
+ * TOML, `.properties`, HCL/HOCON, INI, editorconfig, KDL, CUE, CSV, PSV, PO,
777
+ * nginx config, Caddy config, XML, and DTD.
778
+ *
779
+ * For languages outside this set the field is left as `None`.
780
+ */
781
+ readonly dataExtraction?: boolean;
782
+ }
783
+
784
+ /**
785
+ * Complete analysis result from processing a source file.
786
+ *
787
+ * Contains metrics, structural analysis, imports/exports, comments,
788
+ * docstrings, symbols, diagnostics, and optionally chunked code segments.
789
+ * Fields are populated based on the `ProcessConfig` flags.
790
+ *
791
+ * # Fields
792
+ *
793
+ * - `language` - The language used for parsing
794
+ * - `metrics` - Always computed: line counts, byte sizes, error counts
795
+ * - `structure` - Functions, classes, structs (when `config.structure = true`)
796
+ * - `imports` - Import statements (when `config.imports = true`)
797
+ * - `exports` - Export statements (when `config.exports = true`)
798
+ * - `comments` - Comments (when `config.comments = true`)
799
+ * - `docstrings` - Docstrings (when `config.docstrings = true`)
800
+ * - `symbols` - Symbol definitions (when `config.symbols = true`)
801
+ * - `diagnostics` - Parse errors (when `config.diagnostics = true`)
802
+ * - `chunks` - Chunked code segments (when `config.chunk_max_size` is set)
803
+ */
804
+ export interface ProcessResult {
805
+ /** The language name used to parse the source file. */
806
+ readonly language?: string;
807
+ /** File-level metrics (line counts, byte size, error count). */
808
+ readonly metrics?: FileMetrics;
809
+ /** Top-level structural items (functions, classes, etc.). */
810
+ readonly structure?: Array<StructureItem>;
811
+ /** Import statements extracted from the source. */
812
+ readonly imports?: Array<ImportInfo>;
813
+ /** Export statements extracted from the source. */
814
+ readonly exports?: Array<ExportInfo>;
815
+ /** Comments extracted from the source. */
816
+ readonly comments?: Array<CommentInfo>;
817
+ /** Docstrings extracted from the source. */
818
+ readonly docstrings?: Array<DocstringInfo>;
819
+ /** Symbol definitions (variables, types, functions) extracted from the source. */
820
+ readonly symbols?: Array<SymbolInfo>;
821
+ /** Parse diagnostics (syntax errors, missing nodes) from tree-sitter. */
822
+ readonly diagnostics?: Array<Diagnostic>;
823
+ /** Syntax-aware code chunks produced when chunking is enabled. */
824
+ readonly chunks?: Array<CodeChunk>;
825
+ /**
826
+ * Hierarchical data tree extracted when `config.data_extraction` is `true`.
827
+ *
828
+ * Populated for supported data-format languages (JSON, YAML, TOML, properties,
829
+ * HCL, INI, XML, CSV, and more). `None` when `data_extraction` is `false` (the
830
+ * default) or when the language is not a recognised data format.
831
+ *
832
+ * See [`DataNode`] for the shape of the returned tree.
833
+ */
834
+ readonly data?: DataNode;
835
+ }
836
+
837
+ /**
838
+ * Byte and line/column range in source code.
839
+ *
840
+ * Represents both byte offsets (for slicing) and human-readable line/column
841
+ * positions (for display and diagnostics).
842
+ */
843
+ export interface Span {
844
+ /** Inclusive start byte offset in the source. */
845
+ readonly startByte?: number;
846
+ /** Exclusive end byte offset in the source. */
847
+ readonly endByte?: number;
848
+ /** Zero-indexed line number of the span's start. */
849
+ readonly startLine?: number;
850
+ /** Zero-indexed column number of the span's start. */
851
+ readonly startColumn?: number;
852
+ /** Zero-indexed line number of the span's end. */
853
+ readonly endLine?: number;
854
+ /** Zero-indexed column number of the span's end. */
855
+ readonly endColumn?: number;
856
+ }
857
+
858
+ /** A structural item (function, class, struct, etc.) in source code. */
859
+ export interface StructureItem {
860
+ /** The kind of structural item. */
861
+ readonly kind?: StructureKind;
862
+ /** The declared name of the item, if present. */
863
+ readonly name?: string;
864
+ /** Visibility modifier (e.g., `"pub"`, `"public"`, `"private"`). */
865
+ readonly visibility?: string;
866
+ /** Source span covering the entire item declaration. */
867
+ readonly span?: Span;
868
+ /** Nested structural items (e.g., methods within a class). */
869
+ readonly children?: Array<StructureItem>;
870
+ /** Decorator or attribute names applied to the item. */
871
+ readonly decorators?: Array<string>;
872
+ /** Documentation comment attached to the item, if any. */
873
+ readonly docComment?: string;
874
+ /** Full signature text of the item (e.g., function parameters and return type). */
875
+ readonly signature?: string;
876
+ /** Source span covering only the body of the item, if distinct from the declaration. */
877
+ readonly bodySpan?: Span;
878
+ }
879
+
880
+ /**
881
+ * The kind of structural item found in source code.
882
+ *
883
+ * Categorizes top-level and nested declarations such as functions, classes,
884
+ * structs, enums, traits, and more. Use [`Other`](StructureKind::Other) for
885
+ * language-specific constructs that do not fit a standard category.
886
+ *
887
+ * # Wire format (public JSON contract)
888
+ *
889
+ * Unit variants serialize as a bare string (`"Function"`); the `Other`
890
+ * variant serializes as a single-keyed object (`{"Other": "macro"}`). DO
891
+ * NOT add `#[serde(tag = "...")]` or rename variants — every language
892
+ * binding has a hand-written deserializer matching this exact shape, and
893
+ * any change breaks all bindings' `process()` tests simultaneously.
894
+ * Covered by `tests/wire_format.rs`.
895
+ */
896
+ export declare enum StructureKind {
897
+ /** A free-standing or associated function. */
898
+ Function = "Function",
899
+ /** A method defined inside a class, struct, trait, or impl block. */
900
+ Method = "Method",
901
+ /** A class definition. */
902
+ Class = "Class",
903
+ /** A struct definition. */
904
+ Struct = "Struct",
905
+ /** An interface or protocol definition. */
906
+ Interface = "Interface",
907
+ /** An enum definition. */
908
+ Enum = "Enum",
909
+ /** A module or package declaration. */
910
+ Module = "Module",
911
+ /** A trait definition. */
912
+ Trait = "Trait",
913
+ /** An impl block (Rust) or similar implementation block. */
914
+ Impl = "Impl",
915
+ /** A namespace declaration. */
916
+ Namespace = "Namespace",
917
+ /** A language-specific construct that does not fit any standard category. */
918
+ Other = "Other",
919
+ }
920
+
921
+ /** A symbol (variable, function, type, etc.) extracted from source code. */
922
+ export interface SymbolInfo {
923
+ /** The name of the symbol. */
924
+ readonly name?: string;
925
+ /** The kind of symbol (variable, function, class, etc.). */
926
+ readonly kind?: SymbolKind;
927
+ /** Source span covering the symbol definition. */
928
+ readonly span?: Span;
929
+ /** Explicit type annotation, if present in the source. */
930
+ readonly typeAnnotation?: string;
931
+ /** Documentation comment associated with this symbol. */
932
+ readonly doc?: string;
933
+ }
934
+
935
+ /**
936
+ * The kind of a symbol definition found in source code.
937
+ *
938
+ * Categorizes symbol definitions such as variables, constants, functions,
939
+ * classes, types, interfaces, enums, and modules.
940
+ *
941
+ * # Wire format (public JSON contract)
942
+ *
943
+ * Unit variants serialize as a bare string (`"Function"`); the `Other`
944
+ * variant serializes as a single-keyed object (`{"Other": "macro"}`). DO
945
+ * NOT add `#[serde(tag = "...")]`. Covered by `tests/wire_format.rs`.
946
+ */
947
+ export declare enum SymbolKind {
948
+ /** A variable binding. */
949
+ Variable = "Variable",
950
+ /** A constant (immutable binding). */
951
+ Constant = "Constant",
952
+ /** A function definition. */
953
+ Function = "Function",
954
+ /** A class definition. */
955
+ Class = "Class",
956
+ /** A type alias or typedef. */
957
+ Type = "Type",
958
+ /** An interface definition. */
959
+ Interface = "Interface",
960
+ /** An enum definition. */
961
+ Enum = "Enum",
962
+ /** A module declaration. */
963
+ Module = "Module",
964
+ /** A symbol kind not covered by the standard variants. */
965
+ Other = "Other",
966
+ }
967
+
968
+ /** A parsed syntax tree. Cheap to clone (refcount bump). */
969
+ export declare class Tree {
970
+ /** Return the root [`Node`] of this tree. */
971
+ rootNode(): Node;
972
+ /** Return a [`TreeCursor`] positioned at the root. */
973
+ walk(): TreeCursor;
974
+ }
975
+
976
+ /** A cursor for traversing a [`Tree`]. */
977
+ export declare class TreeCursor {
978
+ /** Return the [`Node`] at the cursor's current position. */
979
+ node(): Node;
980
+ /**
981
+ * Move the cursor to the first child of the current node.
982
+ * Returns `true` if a child existed.
983
+ */
984
+ gotoFirstChild(): boolean;
985
+ /**
986
+ * Move the cursor to the parent of the current node.
987
+ * Returns `true` if a parent existed.
988
+ */
989
+ gotoParent(): boolean;
990
+ /**
991
+ * Move the cursor to the next sibling of the current node.
992
+ * Returns `true` if a sibling existed.
993
+ */
994
+ gotoNextSibling(): boolean;
995
+ /** Return the field name for the current node, if any. */
996
+ fieldName(): string | null;
997
+ }
998
+
999
+ /**
1000
+ * Return the number of available languages.
1001
+ *
1002
+ * Includes statically compiled languages, dynamically loadable languages,
1003
+ * and aliases.
1004
+ */
1005
+ export declare function languageCount(): number;
1006
+
1007
+ /**
1008
+ * Return all language names available in the remote manifest (306).
1009
+ *
1010
+ * Fetches (and caches) the remote manifest to discover the full list of
1011
+ * downloadable languages. Use [`downloaded_languages`] to list what is
1012
+ * already cached locally.
1013
+ * @throws Returns an error if the manifest cannot be fetched.
1014
+ */
1015
+ export declare function manifestLanguages(): Array<string>;
1016
+
1017
+ /**
1018
+ * Process source code and extract file intelligence using the global registry.
1019
+ *
1020
+ * Parses the source with tree-sitter and extracts metrics, structure, imports,
1021
+ * exports, comments, docstrings, symbols, diagnostics, and/or chunks based on
1022
+ * the flags set in [`ProcessConfig`].
1023
+ * @throws Returns an error if the language is not found or parsing fails.
1024
+ */
1025
+ export declare function process(source: string, config?: ProcessConfig | undefined | null): ProcessResult;
package/index.js ADDED
@@ -0,0 +1,102 @@
1
+ "use strict";
2
+
3
+ const { platform, arch } = process;
4
+ const isWindows = platform === "win32";
5
+ const isMusl = () => {
6
+ // Prefer the report-header `glibcVersion` string when present — fastest and
7
+ // unambiguous on Node builds that populate it. On Node 22+, certain CI
8
+ // environments leave `glibcVersion` undefined even on glibc systems, so the
9
+ // `=== undefined` branch from older napi-rs templates produces a false
10
+ // "is musl" positive. Fall through to the filesystem heuristic instead: on
11
+ // glibc systems `/lib64/ld-musl-x86_64.so.1` does not exist; on musl systems
12
+ // it always does. statSync errors → not musl.
13
+ if (
14
+ typeof process.report === "object" &&
15
+ typeof process.report.getReport === "function"
16
+ ) {
17
+ const report = process.report.getReport();
18
+ if (
19
+ report &&
20
+ report.header &&
21
+ typeof report.header.glibcVersion === "string"
22
+ ) {
23
+ return false;
24
+ }
25
+ }
26
+ try {
27
+ require("fs").statSync("/lib64/ld-musl-x86_64.so.1");
28
+ return true;
29
+ } catch {
30
+ return false;
31
+ }
32
+ };
33
+
34
+ let nativeBinding = null;
35
+ const loadErrors = [];
36
+
37
+ function requireOptionalDependency(name) {
38
+ try {
39
+ return require(name);
40
+ } catch (e) {
41
+ loadErrors.push(`Optional dependency ${name}: ${e.message}`);
42
+ return null;
43
+ }
44
+ }
45
+
46
+ const tryLoadBinding = () => {
47
+ // Local `.node` files are named after `napi.binaryName` (binary file name on disk).
48
+ // Optional-dep packages are named after `napi.packageName` (npm subpackage names),
49
+ // which inherits any scope prefix from the parent package.
50
+ const targets = [
51
+ ["linux", "x64", "gnu", "./ts-pack-core-node.linux-x64-gnu.node", "@xberg-io/tree-sitter-language-pack-linux-x64-gnu"],
52
+ ["linux", "arm64", "gnu", "./ts-pack-core-node.linux-arm64-gnu.node", "@xberg-io/tree-sitter-language-pack-linux-arm64-gnu"],
53
+ ["linux", "x64", "musl", "./ts-pack-core-node.linux-x64-musl.node", "@xberg-io/tree-sitter-language-pack-linux-x64-musl"],
54
+ ["linux", "arm64", "musl", "./ts-pack-core-node.linux-arm64-musl.node", "@xberg-io/tree-sitter-language-pack-linux-arm64-musl"],
55
+ ["darwin", "x64", null, "./ts-pack-core-node.darwin-x64.node", "@xberg-io/tree-sitter-language-pack-darwin-x64"],
56
+ ["darwin", "arm64", null, "./ts-pack-core-node.darwin-arm64.node", "@xberg-io/tree-sitter-language-pack-darwin-arm64"],
57
+ ["win32", "x64", null, "./ts-pack-core-node.win32-x64-msvc.node", "@xberg-io/tree-sitter-language-pack-win32-x64-msvc"],
58
+ ["win32", "arm64", null, "./ts-pack-core-node.win32-arm64-msvc.node", "@xberg-io/tree-sitter-language-pack-win32-arm64-msvc"],
59
+ ];
60
+
61
+ for (const [plat, a, abi, localPath, optionalDep] of targets) {
62
+ if (platform !== plat || arch !== a) {
63
+ continue;
64
+ }
65
+
66
+ if (plat === "linux" && abi) {
67
+ const isCurMusl = isMusl();
68
+ if ((abi === "musl") !== isCurMusl) {
69
+ continue;
70
+ }
71
+ }
72
+
73
+ try {
74
+ nativeBinding = require(localPath);
75
+ if (nativeBinding) {
76
+ return;
77
+ }
78
+ } catch (e) {
79
+ loadErrors.push(e.message);
80
+ }
81
+
82
+ try {
83
+ const optBinding = requireOptionalDependency(optionalDep);
84
+ if (optBinding) {
85
+ nativeBinding = optBinding;
86
+ return;
87
+ }
88
+ } catch (e) {
89
+ loadErrors.push(e.message);
90
+ }
91
+ }
92
+ };
93
+
94
+ tryLoadBinding();
95
+
96
+ if (!nativeBinding) {
97
+ throw new Error(
98
+ `Failed to load native binding for ${platform}-${arch}. Errors: ${loadErrors.join(", ")}`
99
+ );
100
+ }
101
+
102
+ module.exports = nativeBinding;
package/package.json ADDED
@@ -0,0 +1,54 @@
1
+ {
2
+ "name": "@xberg-io/tree-sitter-language-pack",
3
+ "version": "0.0.1",
4
+ "description": "Pre-compiled tree-sitter grammars for 306 programming languages",
5
+ "license": "MIT",
6
+ "repository": {
7
+ "type": "git",
8
+ "url": "git+https://github.com/xberg-io/tree-sitter-language-pack.git"
9
+ },
10
+ "files": [
11
+ "index.js",
12
+ "index.d.ts",
13
+ "*.node"
14
+ ],
15
+ "main": "index.js",
16
+ "types": "index.d.ts",
17
+ "exports": {
18
+ ".": {
19
+ "types": "./index.d.ts",
20
+ "require": "./index.js",
21
+ "default": "./index.js"
22
+ }
23
+ },
24
+ "publishConfig": {
25
+ "access": "public"
26
+ },
27
+ "optionalDependencies": {
28
+ "@xberg-io/tree-sitter-language-pack-darwin-arm64": "0.0.1",
29
+ "@xberg-io/tree-sitter-language-pack-darwin-x64": "0.0.1",
30
+ "@xberg-io/tree-sitter-language-pack-linux-arm64-gnu": "0.0.1",
31
+ "@xberg-io/tree-sitter-language-pack-linux-arm64-musl": "0.0.1",
32
+ "@xberg-io/tree-sitter-language-pack-linux-x64-gnu": "0.0.1",
33
+ "@xberg-io/tree-sitter-language-pack-linux-x64-musl": "0.0.1",
34
+ "@xberg-io/tree-sitter-language-pack-win32-arm64-msvc": "0.0.1",
35
+ "@xberg-io/tree-sitter-language-pack-win32-x64-msvc": "0.0.1"
36
+ },
37
+ "napi": {
38
+ "binaryName": "ts-pack-core-node",
39
+ "packageName": "@xberg-io/tree-sitter-language-pack",
40
+ "targets": [
41
+ "x86_64-unknown-linux-gnu",
42
+ "aarch64-unknown-linux-gnu",
43
+ "x86_64-unknown-linux-musl",
44
+ "aarch64-unknown-linux-musl",
45
+ "x86_64-apple-darwin",
46
+ "aarch64-apple-darwin",
47
+ "x86_64-pc-windows-msvc",
48
+ "aarch64-pc-windows-msvc"
49
+ ]
50
+ },
51
+ "engines": {
52
+ "node": ">= 18"
53
+ }
54
+ }