@mrclrchtr/supi-tree-sitter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/README.md +97 -0
  2. package/package.json +67 -0
  3. package/resources/.gitkeep +0 -0
  4. package/resources/grammars/bash/tree-sitter-bash.wasm +0 -0
  5. package/resources/grammars/bash/tree-sitter-bash.wasm.json +7 -0
  6. package/resources/grammars/c/tree-sitter-c.wasm +0 -0
  7. package/resources/grammars/c/tree-sitter-c.wasm.json +7 -0
  8. package/resources/grammars/cpp/tree-sitter-cpp.wasm +0 -0
  9. package/resources/grammars/cpp/tree-sitter-cpp.wasm.json +7 -0
  10. package/resources/grammars/go/tree-sitter-go.wasm +0 -0
  11. package/resources/grammars/go/tree-sitter-go.wasm.json +7 -0
  12. package/resources/grammars/html/tree-sitter-html.wasm +0 -0
  13. package/resources/grammars/html/tree-sitter-html.wasm.json +7 -0
  14. package/resources/grammars/java/tree-sitter-java.wasm +0 -0
  15. package/resources/grammars/java/tree-sitter-java.wasm.json +7 -0
  16. package/resources/grammars/javascript/tree-sitter-javascript.wasm +0 -0
  17. package/resources/grammars/javascript/tree-sitter-javascript.wasm.json +7 -0
  18. package/resources/grammars/kotlin/tree-sitter-kotlin.wasm +0 -0
  19. package/resources/grammars/kotlin/tree-sitter-kotlin.wasm.json +12 -0
  20. package/resources/grammars/python/tree-sitter-python.wasm +0 -0
  21. package/resources/grammars/python/tree-sitter-python.wasm.json +7 -0
  22. package/resources/grammars/r/tree-sitter-r.wasm +0 -0
  23. package/resources/grammars/r/tree-sitter-r.wasm.json +7 -0
  24. package/resources/grammars/ruby/tree-sitter-ruby.wasm +0 -0
  25. package/resources/grammars/ruby/tree-sitter-ruby.wasm.json +7 -0
  26. package/resources/grammars/rust/tree-sitter-rust.wasm +0 -0
  27. package/resources/grammars/rust/tree-sitter-rust.wasm.json +7 -0
  28. package/resources/grammars/sql/tree-sitter-sql.wasm +0 -0
  29. package/resources/grammars/sql/tree-sitter-sql.wasm.json +19 -0
  30. package/resources/grammars/tsx/tree-sitter-tsx.wasm +0 -0
  31. package/resources/grammars/tsx/tree-sitter-tsx.wasm.json +7 -0
  32. package/resources/grammars/typescript/tree-sitter-typescript.wasm +0 -0
  33. package/resources/grammars/typescript/tree-sitter-typescript.wasm.json +7 -0
  34. package/scripts/generate-kotlin-wasm.mjs +126 -0
  35. package/scripts/generate-sql-wasm.mjs +144 -0
  36. package/scripts/vendor-wasm.mjs +151 -0
  37. package/src/callees.ts +343 -0
  38. package/src/coordinates.ts +108 -0
  39. package/src/exports.ts +315 -0
  40. package/src/formatting.ts +104 -0
  41. package/src/imports.ts +42 -0
  42. package/src/index.ts +16 -0
  43. package/src/language.ts +116 -0
  44. package/src/node-at.ts +96 -0
  45. package/src/outline.ts +287 -0
  46. package/src/runtime.ts +237 -0
  47. package/src/session.ts +112 -0
  48. package/src/structure.ts +7 -0
  49. package/src/syntax-node.ts +13 -0
  50. package/src/tree-sitter.ts +306 -0
  51. package/src/types.ts +146 -0
@@ -0,0 +1,151 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Vendors Tree-sitter grammar WASM files from their installed npm packages
4
+ * into the package's resources/ directory.
5
+ *
6
+ * Grammars whose npm packages ship pre-built WASM files are copied directly.
7
+ * Kotlin and SQL are handled by dedicated generator scripts because their
8
+ * npm packages do not include WASM files.
9
+ *
10
+ * Usage:
11
+ * node scripts/vendor-wasm.mjs # Copy WASM files
12
+ * node scripts/vendor-wasm.mjs --check # Verify checksums
13
+ */
14
+
15
+ import { createHash } from "node:crypto";
16
+ import * as fs from "node:fs";
17
+ import { createRequire } from "node:module";
18
+ import * as path from "node:path";
19
+ import { fileURLToPath } from "node:url";
20
+
21
+ const require = createRequire(import.meta.url);
22
+ const scriptDir = path.dirname(fileURLToPath(import.meta.url));
23
+ const packageRoot = path.resolve(scriptDir, "..");
24
+ const resourcesDir = path.join(packageRoot, "resources", "grammars");
25
+ const checkMode = process.argv.includes("--check");
26
+
27
+ /**
28
+ * Map: grammarId → { npmPackage, wasmFile }
29
+ * Only grammars whose npm packages ship .wasm files belong here.
30
+ * Kotlin and SQL use dedicated generator scripts (generate-kotlin-wasm.mjs,
31
+ * generate-sql-wasm.mjs) because their packages do not include WASM.
32
+ */
33
+ const GRAMMAR_SOURCES = {
34
+ javascript: { npmPackage: "tree-sitter-javascript", wasmFile: "tree-sitter-javascript.wasm" },
35
+ typescript: { npmPackage: "tree-sitter-typescript", wasmFile: "tree-sitter-typescript.wasm" },
36
+ tsx: { npmPackage: "tree-sitter-typescript", wasmFile: "tree-sitter-tsx.wasm" },
37
+ python: { npmPackage: "tree-sitter-python", wasmFile: "tree-sitter-python.wasm" },
38
+ rust: { npmPackage: "tree-sitter-rust", wasmFile: "tree-sitter-rust.wasm" },
39
+ go: { npmPackage: "tree-sitter-go", wasmFile: "tree-sitter-go.wasm" },
40
+ c: { npmPackage: "tree-sitter-c", wasmFile: "tree-sitter-c.wasm" },
41
+ cpp: { npmPackage: "tree-sitter-cpp", wasmFile: "tree-sitter-cpp.wasm" },
42
+ java: { npmPackage: "tree-sitter-java", wasmFile: "tree-sitter-java.wasm" },
43
+ ruby: { npmPackage: "tree-sitter-ruby", wasmFile: "tree-sitter-ruby.wasm" },
44
+ bash: { npmPackage: "tree-sitter-bash", wasmFile: "tree-sitter-bash.wasm" },
45
+ html: { npmPackage: "tree-sitter-html", wasmFile: "tree-sitter-html.wasm" },
46
+ r: { npmPackage: "@davisvaughan/tree-sitter-r", wasmFile: "tree-sitter-r.wasm" },
47
+ };
48
+
49
+ function sha256(filePath) {
50
+ return createHash("sha256").update(fs.readFileSync(filePath)).digest("hex");
51
+ }
52
+
53
+ function readPackage(packageName) {
54
+ const packageJsonPath = require.resolve(`${packageName}/package.json`);
55
+ return {
56
+ dir: path.dirname(packageJsonPath),
57
+ json: JSON.parse(fs.readFileSync(packageJsonPath, "utf-8")),
58
+ };
59
+ }
60
+
61
+ function vendorGrammar(grammarId, source) {
62
+ const pkg = readPackage(source.npmPackage);
63
+ const srcWasmPath = path.join(pkg.dir, source.wasmFile);
64
+
65
+ if (!fs.existsSync(srcWasmPath)) {
66
+ throw new Error(
67
+ `WASM file not found in ${source.npmPackage} at ${srcWasmPath}. ` +
68
+ `Ensure the package is installed (pnpm install) and ships a .wasm file.`,
69
+ );
70
+ }
71
+
72
+ const grammarDir = path.join(resourcesDir, grammarId);
73
+ const destWasmPath = path.join(grammarDir, source.wasmFile);
74
+ const metadataPath = path.join(grammarDir, `${source.wasmFile}.json`);
75
+
76
+ fs.mkdirSync(grammarDir, { recursive: true });
77
+ fs.copyFileSync(srcWasmPath, destWasmPath);
78
+
79
+ const checksum = sha256(destWasmPath);
80
+ const metadata = {
81
+ source: {
82
+ npmPackage: source.npmPackage,
83
+ version: pkg.json.version,
84
+ },
85
+ sha256: checksum,
86
+ };
87
+ fs.writeFileSync(metadataPath, `${JSON.stringify(metadata, null, 2)}\n`);
88
+
89
+ process.stdout.write(
90
+ `${grammarId}: ${source.wasmFile} (${source.npmPackage} ${pkg.json.version}, ${checksum})\n`,
91
+ );
92
+ }
93
+
94
+ function checkGrammar(grammarId, source) {
95
+ const grammarDir = path.join(resourcesDir, grammarId);
96
+ const wasmPath = path.join(grammarDir, source.wasmFile);
97
+ const metadataPath = path.join(grammarDir, `${source.wasmFile}.json`);
98
+ const errors = [];
99
+
100
+ if (!fs.existsSync(wasmPath)) {
101
+ errors.push(`${grammarId}: missing vendored WASM at ${wasmPath}`);
102
+ return errors;
103
+ }
104
+ if (!fs.existsSync(metadataPath)) {
105
+ errors.push(`${grammarId}: missing metadata at ${metadataPath}`);
106
+ return errors;
107
+ }
108
+
109
+ try {
110
+ const pkg = readPackage(source.npmPackage);
111
+ const metadata = JSON.parse(fs.readFileSync(metadataPath, "utf-8"));
112
+ const actualSha = sha256(wasmPath);
113
+
114
+ if (metadata.source?.npmPackage !== source.npmPackage) {
115
+ errors.push(`${grammarId}: metadata source npmPackage mismatch`);
116
+ }
117
+ if (metadata.source?.version !== pkg.json.version) {
118
+ errors.push(
119
+ `${grammarId}: metadata version ${metadata.source?.version} !== installed ${pkg.json.version}`,
120
+ );
121
+ }
122
+ if (metadata.sha256 !== actualSha) {
123
+ errors.push(`${grammarId}: metadata sha256 ${metadata.sha256} !== actual ${actualSha}`);
124
+ }
125
+ } catch (err) {
126
+ errors.push(`${grammarId}: ${err.message}`);
127
+ }
128
+
129
+ return errors;
130
+ }
131
+
132
+ // Main
133
+ if (checkMode) {
134
+ let allErrors = [];
135
+ for (const [grammarId, source] of Object.entries(GRAMMAR_SOURCES)) {
136
+ const errors = checkGrammar(grammarId, source);
137
+ allErrors = allErrors.concat(errors);
138
+ }
139
+
140
+ if (allErrors.length > 0) {
141
+ throw new Error(
142
+ `Vendored WASM checks failed:\n- ${allErrors.join("\n- ")}\nRun: node scripts/vendor-wasm.mjs`,
143
+ );
144
+ }
145
+ process.stdout.write("All vendored WASM files are current.\n");
146
+ } else {
147
+ for (const [grammarId, source] of Object.entries(GRAMMAR_SOURCES)) {
148
+ vendorGrammar(grammarId, source);
149
+ }
150
+ process.stdout.write("All vendored WASM files generated.\n");
151
+ }
package/src/callees.ts ADDED
@@ -0,0 +1,343 @@
1
+ // Structural callee extraction — enclosing-scope lookup with per-language queries.
2
+
3
+ import { detectGrammar } from "./language.ts";
4
+ import type { TreeSitterRuntime } from "./runtime.ts";
5
+ import type { GrammarId, SourceRange, TreeSitterResult } from "./types.ts";
6
+
7
+ /** Result shape returned by lookupCalleesAt. */
8
+ export interface CalleesAtResult {
9
+ enclosingScope: {
10
+ name: string;
11
+ range: SourceRange;
12
+ };
13
+ callees: Array<{
14
+ name: string;
15
+ range: SourceRange;
16
+ }>;
17
+ }
18
+
19
+ // ── Per-grammar callee queries ────────────────────────────────────────
20
+
21
+ const CALLEE_QUERIES: Partial<Record<GrammarId, string>> = {
22
+ javascript: "(call_expression function: (_) @callee) (new_expression constructor: (_) @callee)",
23
+ typescript: "(call_expression function: (_) @callee) (new_expression constructor: (_) @callee)",
24
+ tsx: "(call_expression function: (_) @callee) (new_expression constructor: (_) @callee)",
25
+ python: "(call function: (_) @callee)",
26
+ rust: "(call_expression function: (_) @callee) (macro_invocation) @callee",
27
+ go: "(call_expression function: (_) @callee)",
28
+ c: "(call_expression function: (_) @callee)",
29
+ cpp: "(call_expression function: (_) @callee)",
30
+ java: "(method_invocation name: (_) @callee) (object_creation_expression type: (_) @callee)",
31
+ kotlin: "(call_expression . (_) @callee)",
32
+ ruby: "(call method: (_) @callee)",
33
+ bash: "(command . (_) @callee)",
34
+ r: "(call function: (_) @callee)",
35
+ };
36
+
37
+ // ── Enclosing scope node types per grammar ────────────────────────────
38
+
39
+ const ENCLOSING_SCOPE_TYPES: Record<GrammarId, ReadonlySet<string>> = {
40
+ javascript: new Set([
41
+ "function_declaration",
42
+ "method_definition",
43
+ "arrow_function",
44
+ "function_expression",
45
+ ]),
46
+ typescript: new Set([
47
+ "function_declaration",
48
+ "method_definition",
49
+ "arrow_function",
50
+ "function_expression",
51
+ ]),
52
+ tsx: new Set([
53
+ "function_declaration",
54
+ "method_definition",
55
+ "arrow_function",
56
+ "function_expression",
57
+ ]),
58
+ python: new Set(["function_definition"]),
59
+ rust: new Set(["function_item"]),
60
+ go: new Set(["function_declaration"]),
61
+ c: new Set(["function_definition"]),
62
+ cpp: new Set(["function_definition"]),
63
+ java: new Set(["method_declaration"]),
64
+ kotlin: new Set(["function_declaration"]),
65
+ ruby: new Set(["method"]),
66
+ bash: new Set(["function_definition"]),
67
+ r: new Set(["function_definition"]),
68
+ html: new Set(),
69
+ sql: new Set(),
70
+ };
71
+
72
+ // ── Name extraction from enclosing scope text ─────────────────────────
73
+
74
+ /** Extract a best-effort name from an enclosing scope node's source text. */
75
+ function extractScopeName(_type: string, text: string): string {
76
+ // JS/TS/Go/Rust: `function foo` or `fn foo` or `def foo`
77
+ // Python: `def foo`
78
+ const firstLine = text.split("\n")[0] ?? text;
79
+ const match = firstLine.match(/(?:function|fn|func|def|method)\s+(\w+)/);
80
+ if (match?.[1]) return match[1];
81
+
82
+ // Ruby: `def foo`
83
+ const rubyMatch = firstLine.match(/def\s+(\w+)/);
84
+ if (rubyMatch?.[1]) return rubyMatch[1];
85
+
86
+ // Bash: `foo()`
87
+ const bashMatch = firstLine.match(/^(\w+)\s*\(\)/);
88
+ if (bashMatch?.[1]) return bashMatch[1];
89
+
90
+ // Kotlin/Java: `fun foo` or `fun Foo.bar()` — extract the last name part
91
+ const kotlinMatch = firstLine.match(/fun\s+(?:[\w.]+\.)?(\w+)/);
92
+ if (kotlinMatch?.[1]) return kotlinMatch[1];
93
+
94
+ // C++: type name(params) { — heuristic: first word before `(`
95
+ const cLikeMatch = firstLine.match(
96
+ /(\w+)\s*\([^)]*\)\s*(?:const\s*)?(?:override\s*)?(?:final\s*)?[{;]/,
97
+ );
98
+ if (cLikeMatch?.[1]) return cLikeMatch[1];
99
+
100
+ return "anonymous";
101
+ }
102
+
103
+ // ── Main entrypoint ──────────────────────────────────────────────────
104
+
105
+ /**
106
+ * Extract structural callee calls for a file at the given position.
107
+ *
108
+ * 1. Parses the file with the Tree-sitter runtime.
109
+ * 2. Finds the enclosing function/method scope at the position.
110
+ * 3. Runs a grammar-specific callee query.
111
+ * 4. Filters to captures within the enclosing scope.
112
+ * 5. Deduplicates by name.
113
+ */
114
+ /** Validate that coordinates are usable and grammar is supported. */
115
+ function validateCalleeInput(
116
+ filePath: string,
117
+ line: number,
118
+ character: number,
119
+ ): { kind: "ok"; grammarId: GrammarId } | TreeSitterResult<CalleesAtResult> {
120
+ if (!Number.isInteger(line) || line < 1) {
121
+ return {
122
+ kind: "validation-error" as const,
123
+ message: "line must be a positive 1-based integer",
124
+ };
125
+ }
126
+ if (!Number.isInteger(character) || character < 1) {
127
+ return {
128
+ kind: "validation-error" as const,
129
+ message: "character must be a positive 1-based integer",
130
+ };
131
+ }
132
+
133
+ const grammarId = detectGrammar(filePath);
134
+ if (!grammarId) {
135
+ return {
136
+ kind: "unsupported-language" as const,
137
+ file: filePath,
138
+ message: `Unsupported file: ${filePath}`,
139
+ };
140
+ }
141
+
142
+ if (!CALLEE_QUERIES[grammarId]) {
143
+ return {
144
+ kind: "unsupported-language" as const,
145
+ file: filePath,
146
+ message: `callees is not supported for ${grammarId} files`,
147
+ };
148
+ }
149
+
150
+ return { kind: "ok", grammarId };
151
+ }
152
+
153
+ /** Find the enclosing function/method node at a position in the tree. */
154
+ function findEnclosingScope(
155
+ // biome-ignore lint/suspicious/noExplicitAny: tree-sitter SyntaxNode is complex
156
+ node: any,
157
+ scopeTypes: ReadonlySet<string>,
158
+ // biome-ignore lint/suspicious/noExplicitAny: tree-sitter SyntaxNode is complex
159
+ ): any | null {
160
+ let current = node;
161
+ while (current) {
162
+ if (scopeTypes.has(current.type)) return current;
163
+ current = current.parent;
164
+ }
165
+ return null;
166
+ }
167
+
168
+ export async function lookupCalleesAt(
169
+ runtime: TreeSitterRuntime,
170
+ filePath: string,
171
+ line: number,
172
+ character: number,
173
+ ): Promise<TreeSitterResult<CalleesAtResult>> {
174
+ // Validate coordinates and grammar
175
+ const validation = validateCalleeInput(filePath, line, character);
176
+ if (validation.kind !== "ok") return validation;
177
+ const { grammarId } = validation;
178
+
179
+ // Parse the file
180
+ const parseResult = await runtime.parseFile(filePath);
181
+ if (parseResult.kind !== "success") return parseResult;
182
+
183
+ const { tree } = parseResult.data;
184
+
185
+ try {
186
+ const scopes = ENCLOSING_SCOPE_TYPES[grammarId];
187
+ const tsPoint = tsPosition(line, character);
188
+ const node = tree.rootNode.descendantForPosition(tsPoint);
189
+
190
+ if (!node) {
191
+ return {
192
+ kind: "runtime-error",
193
+ message: "No node found at the given position",
194
+ };
195
+ }
196
+
197
+ const enclosingNode = findEnclosingScope(node, scopes);
198
+ if (!enclosingNode) {
199
+ return {
200
+ kind: "runtime-error",
201
+ message: "No enclosing function or method found at the given position",
202
+ };
203
+ }
204
+
205
+ const queryStr = CALLEE_QUERIES[grammarId];
206
+ if (!queryStr) {
207
+ return {
208
+ kind: "runtime-error",
209
+ message: `No callee query configured for ${grammarId}`,
210
+ };
211
+ }
212
+
213
+ const queryResult = await runtime.queryFile(filePath, queryStr);
214
+ if (queryResult.kind !== "success") {
215
+ return {
216
+ kind: "runtime-error",
217
+ message: "Callee query failed",
218
+ };
219
+ }
220
+
221
+ const callees = filterCalleeCaptures(queryResult.data, enclosingNode, scopes, tsPoint.row);
222
+
223
+ const enclosingRange = nodeToSourceRange(enclosingNode);
224
+ const scopeName = extractScopeName(enclosingNode.type, enclosingNode.text);
225
+
226
+ return {
227
+ kind: "success",
228
+ data: {
229
+ enclosingScope: {
230
+ name: scopeName,
231
+ range: enclosingRange,
232
+ },
233
+ callees,
234
+ },
235
+ };
236
+ } finally {
237
+ tree.delete();
238
+ }
239
+ }
240
+
241
+ /**
242
+ * Recursively collect line ranges of inner function/method scopes that do not
243
+ * contain the anchor row. Captures from these ranges are excluded so that
244
+ * nested function calls are not attributed to the parent scope.
245
+ */
246
+ function collectInnerScopes(
247
+ node: {
248
+ type: string;
249
+ children: unknown[];
250
+ startPosition: { row: number };
251
+ endPosition: { row: number };
252
+ } | null,
253
+ scopeTypes: ReadonlySet<string>,
254
+ anchorRow: number,
255
+ ranges: Array<{ startRow: number; endRow: number }>,
256
+ ): void {
257
+ if (!node) return;
258
+ // biome-ignore lint/complexity/noForEach: safe iteration over node children
259
+ (node.children ?? []).forEach((child) => {
260
+ const childNode = child as {
261
+ type: string;
262
+ startPosition: { row: number };
263
+ endPosition: { row: number };
264
+ children: unknown[];
265
+ };
266
+ if (scopeTypes.has(childNode.type)) {
267
+ // Exclude this inner scope if it does NOT contain the anchor
268
+ if (!(childNode.startPosition.row <= anchorRow && childNode.endPosition.row >= anchorRow)) {
269
+ ranges.push({
270
+ startRow: childNode.startPosition.row + 1,
271
+ endRow: childNode.endPosition.row + 1,
272
+ });
273
+ }
274
+ // Still recurse into it for deeper nesting
275
+ collectInnerScopes(childNode, scopeTypes, anchorRow, ranges);
276
+ } else {
277
+ // Recurse into non-scope children to find deeper scopes
278
+ collectInnerScopes(childNode, scopeTypes, anchorRow, ranges);
279
+ }
280
+ });
281
+ }
282
+
283
+ /**
284
+ * Filter query captures to only those within the enclosing scope,
285
+ * excluding any that fall within inner nested function scopes.
286
+ */
287
+ function filterCalleeCaptures(
288
+ captures: Array<{ range: SourceRange; text: string }>,
289
+ // biome-ignore lint/suspicious/noExplicitAny: tree-sitter SyntaxNode is complex
290
+ enclosingNode: any,
291
+ scopeTypes: ReadonlySet<string>,
292
+ anchorRow: number,
293
+ ): Array<{ name: string; range: SourceRange }> {
294
+ const excludeRanges: Array<{ startRow: number; endRow: number }> = [];
295
+ collectInnerScopes(enclosingNode, scopeTypes, anchorRow, excludeRanges);
296
+
297
+ const seen = new Set<string>();
298
+ const callees: Array<{ name: string; range: SourceRange }> = [];
299
+
300
+ const enclosingStartRow = enclosingNode.startPosition.row + 1;
301
+ const enclosingEndRow = enclosingNode.endPosition.row + 1;
302
+
303
+ for (const capture of captures) {
304
+ // Only include captures within the enclosing scope
305
+ if (capture.range.startLine < enclosingStartRow || capture.range.endLine > enclosingEndRow) {
306
+ continue;
307
+ }
308
+
309
+ // Exclude captures that fall within inner nested function scopes
310
+ const isInInner = excludeRanges.some(
311
+ (exc) => capture.range.startLine >= exc.startRow && capture.range.endLine <= exc.endRow,
312
+ );
313
+ if (isInInner) continue;
314
+
315
+ const name = capture.text.replace(/\s+/g, "").slice(0, 60);
316
+ if (name.length === 0 || seen.has(name)) continue;
317
+ seen.add(name);
318
+
319
+ callees.push({ name, range: capture.range });
320
+ }
321
+
322
+ return callees;
323
+ }
324
+
325
+ // ── Internal helpers ──────────────────────────────────────────────────
326
+
327
+ /** Convert a tree-sitter node to a SourceRange. */
328
+ function nodeToSourceRange(node: {
329
+ startPosition: { row: number; column: number };
330
+ endPosition: { row: number; column: number };
331
+ }): SourceRange {
332
+ return {
333
+ startLine: node.startPosition.row + 1,
334
+ startCharacter: node.startPosition.column + 1,
335
+ endLine: node.endPosition.row + 1,
336
+ endCharacter: node.endPosition.column + 1,
337
+ };
338
+ }
339
+
340
+ /** Convert 1-based position to tree-sitter 0-based point. */
341
+ function tsPosition(line: number, character: number): { row: number; column: number } {
342
+ return { row: line - 1, column: character - 1 };
343
+ }
@@ -0,0 +1,108 @@
1
+ // Coordinate conversion between public 1-based UTF-16 positions
2
+ // and Tree-sitter runtime byte/row/column positions.
3
+
4
+ import type { SourceRange } from "./types.ts";
5
+
6
+ const encoder = new TextEncoder();
7
+
8
+ /** Point in source: 1-based line and UTF-16 character. */
9
+ export interface PublicPoint {
10
+ line: number;
11
+ character: number;
12
+ }
13
+
14
+ /**
15
+ * Convert a 1-based (line, character) pair to a 0-based (row, column) pair
16
+ * for Tree-sitter lookup.
17
+ *
18
+ * `character` is a UTF-16 code-unit column (editor/LSP convention).
19
+ * We convert it to a byte offset within the line by counting UTF-8 bytes.
20
+ */
21
+ export function publicToTreeSitter(
22
+ line: number,
23
+ character: number,
24
+ source: string,
25
+ ): { row: number; column: number } {
26
+ const row = line - 1;
27
+ const lines = splitSourceLines(source);
28
+ const lineText = row < lines.length ? lines[row] : "";
29
+ // character is 1-based; clamp to line length
30
+ const charIndex = Math.max(0, Math.min(character - 1, lineText.length));
31
+ // Convert UTF-16 code unit index to byte offset
32
+ const textBefore = lineText.substring(0, charIndex);
33
+ const byteOffset = encoder.encode(textBefore).length;
34
+ return { row, column: byteOffset };
35
+ }
36
+
37
+ /**
38
+ * Convert a Tree-sitter (row, column) pair to a 1-based (line, character) pair.
39
+ *
40
+ * `column` is a byte offset within the line; we convert it to a UTF-16
41
+ * code-unit index.
42
+ */
43
+ export function treeSitterToPublic(row: number, column: number, source: string): PublicPoint {
44
+ const lines = splitSourceLines(source);
45
+ const lineText = row < lines.length ? lines[row] : "";
46
+ // Convert byte offset to UTF-16 code unit index
47
+ const charIndex = byteToUtf16Index(lineText, column);
48
+ return { line: row + 1, character: charIndex + 1 };
49
+ }
50
+
51
+ /**
52
+ * Convert a Tree-sitter point {row, column} to a SourceRange-compatible point.
53
+ * Uses the source text for byte-to-UTF16 conversion.
54
+ */
55
+ export function tsPointToPublic(
56
+ point: { row: number; column: number },
57
+ source: string,
58
+ ): PublicPoint {
59
+ return treeSitterToPublic(point.row, point.column, source);
60
+ }
61
+
62
+ /**
63
+ * Convert a Tree-sitter node to a public SourceRange.
64
+ */
65
+ export function nodeToRange(
66
+ node: {
67
+ startPosition: { row: number; column: number };
68
+ endPosition: { row: number; column: number };
69
+ },
70
+ source: string,
71
+ ): SourceRange {
72
+ const start = tsPointToPublic(node.startPosition, source);
73
+ const end = tsPointToPublic(node.endPosition, source);
74
+ return {
75
+ startLine: start.line,
76
+ startCharacter: start.character,
77
+ endLine: end.line,
78
+ endCharacter: end.character,
79
+ };
80
+ }
81
+
82
+ /** Split source into logical lines without CRLF line-ending bytes. */
83
+ export function splitSourceLines(source: string): string[] {
84
+ return source.replace(/\r\n?/g, "\n").split("\n");
85
+ }
86
+
87
+ /**
88
+ * Convert a byte offset within a line to a UTF-16 code unit index.
89
+ */
90
+ function byteToUtf16Index(line: string, byteOffset: number): number {
91
+ let byteCount = 0;
92
+ for (let i = 0; i < line.length; i++) {
93
+ if (byteCount >= byteOffset) return i;
94
+ const char = line.charCodeAt(i);
95
+ // surrogate pair — 4 bytes in UTF-8, but 2 UTF-16 code units
96
+ if (char >= 0xd800 && char <= 0xdbff) {
97
+ byteCount += 4;
98
+ i++; // skip low surrogate
99
+ } else if (char > 0x7f) {
100
+ // Non-ASCII: 2-3 bytes in UTF-8
101
+ if (char <= 0x7ff) byteCount += 2;
102
+ else byteCount += 3;
103
+ } else {
104
+ byteCount += 1;
105
+ }
106
+ }
107
+ return line.length;
108
+ }