opencode-diane 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/CHANGELOG.md +180 -0
  2. package/LICENSE +21 -0
  3. package/README.md +206 -0
  4. package/WIKI.md +1430 -0
  5. package/dist/index.d.ts +28 -0
  6. package/dist/index.js +1632 -0
  7. package/dist/ingest/adaptive.d.ts +47 -0
  8. package/dist/ingest/adaptive.js +182 -0
  9. package/dist/ingest/code-health.d.ts +58 -0
  10. package/dist/ingest/code-health.js +202 -0
  11. package/dist/ingest/code-map.d.ts +71 -0
  12. package/dist/ingest/code-map.js +670 -0
  13. package/dist/ingest/cross-refs.d.ts +59 -0
  14. package/dist/ingest/cross-refs.js +1207 -0
  15. package/dist/ingest/docs.d.ts +49 -0
  16. package/dist/ingest/docs.js +325 -0
  17. package/dist/ingest/git.d.ts +77 -0
  18. package/dist/ingest/git.js +390 -0
  19. package/dist/ingest/live-session.d.ts +101 -0
  20. package/dist/ingest/live-session.js +173 -0
  21. package/dist/ingest/project-notes.d.ts +28 -0
  22. package/dist/ingest/project-notes.js +102 -0
  23. package/dist/ingest/project.d.ts +35 -0
  24. package/dist/ingest/project.js +430 -0
  25. package/dist/ingest/session-snapshot.d.ts +63 -0
  26. package/dist/ingest/session-snapshot.js +94 -0
  27. package/dist/ingest/sessions.d.ts +29 -0
  28. package/dist/ingest/sessions.js +164 -0
  29. package/dist/ingest/tables.d.ts +52 -0
  30. package/dist/ingest/tables.js +360 -0
  31. package/dist/mining/skill-miner.d.ts +53 -0
  32. package/dist/mining/skill-miner.js +234 -0
  33. package/dist/search/bm25.d.ts +81 -0
  34. package/dist/search/bm25.js +334 -0
  35. package/dist/search/e5-embedder.d.ts +30 -0
  36. package/dist/search/e5-embedder.js +91 -0
  37. package/dist/search/embed-pass.d.ts +26 -0
  38. package/dist/search/embed-pass.js +43 -0
  39. package/dist/search/embedder.d.ts +58 -0
  40. package/dist/search/embedder.js +85 -0
  41. package/dist/search/inverted-index.d.ts +51 -0
  42. package/dist/search/inverted-index.js +139 -0
  43. package/dist/search/ppr.d.ts +44 -0
  44. package/dist/search/ppr.js +118 -0
  45. package/dist/search/tokenize.d.ts +26 -0
  46. package/dist/search/tokenize.js +98 -0
  47. package/dist/store/eviction.d.ts +16 -0
  48. package/dist/store/eviction.js +37 -0
  49. package/dist/store/repository.d.ts +222 -0
  50. package/dist/store/repository.js +420 -0
  51. package/dist/store/sqlite-store.d.ts +89 -0
  52. package/dist/store/sqlite-store.js +252 -0
  53. package/dist/store/vector-store.d.ts +66 -0
  54. package/dist/store/vector-store.js +160 -0
  55. package/dist/types.d.ts +385 -0
  56. package/dist/types.js +9 -0
  57. package/dist/utils/file-log.d.ts +87 -0
  58. package/dist/utils/file-log.js +215 -0
  59. package/dist/utils/peer-detection.d.ts +45 -0
  60. package/dist/utils/peer-detection.js +90 -0
  61. package/dist/utils/shell.d.ts +43 -0
  62. package/dist/utils/shell.js +110 -0
  63. package/dist/utils/usage-skill.d.ts +42 -0
  64. package/dist/utils/usage-skill.js +129 -0
  65. package/dist/utils/xlsx.d.ts +36 -0
  66. package/dist/utils/xlsx.js +270 -0
  67. package/grammars/tree-sitter-c.wasm +0 -0
  68. package/grammars/tree-sitter-c_sharp.wasm +0 -0
  69. package/grammars/tree-sitter-cpp.wasm +0 -0
  70. package/grammars/tree-sitter-css.wasm +0 -0
  71. package/grammars/tree-sitter-go.wasm +0 -0
  72. package/grammars/tree-sitter-html.wasm +0 -0
  73. package/grammars/tree-sitter-java.wasm +0 -0
  74. package/grammars/tree-sitter-javascript.wasm +0 -0
  75. package/grammars/tree-sitter-json.wasm +0 -0
  76. package/grammars/tree-sitter-php.wasm +0 -0
  77. package/grammars/tree-sitter-python.wasm +0 -0
  78. package/grammars/tree-sitter-rust.wasm +0 -0
  79. package/grammars/tree-sitter-typescript.wasm +0 -0
  80. package/package.json +80 -0
@@ -0,0 +1,670 @@
1
+ /**
2
+ * Code-map ingestion — an Aider-style "repo map": for each source
3
+ * file, the *signatures* of its top-level definitions (functions,
4
+ * classes, methods, types) with the bodies stripped. The agent gets
5
+ * the shape of the codebase without reading every file.
6
+ *
7
+ * This is the one part of the plugin that is NOT convention-free or
8
+ * dependency-light, and that is a deliberate, opt-in trade:
9
+ *
10
+ * - It needs `web-tree-sitter` (~290 KB) plus a vendored `.wasm`
11
+ * grammar per supported language (~10.3 MB for the eleven below —
12
+ * C++ alone is 4.7 MB and TypeScript 2.3 MB). The rest of the
13
+ * plugin is a ~77 KB source drop with one tiny dependency; this
14
+ * feature is most of the install weight.
15
+ * - It is inherently language-aware: each grammar needs to know
16
+ * which node types are "definitions" (or selectors / keys /
17
+ * elements). That per-language table is the `LANG_SPECS` map
18
+ * below — contained, declarative, and the only place language
19
+ * knowledge lives.
20
+ *
21
+ * Because of that, code-map is gated behind `config.enableCodeMap`
22
+ * and defaults OFF. When disabled, none of this loads — `import()` of
23
+ * `web-tree-sitter` only happens inside `ingestCodeMap`. Languages we
24
+ * have no grammar for are simply skipped; the rest of the plugin is
25
+ * unaffected.
26
+ *
27
+ * Signatures are stored one `code-map` memory per file via
28
+ * `upsertBySubject`, so they're recallable, co-change-boosted, and
29
+ * token-budgeted like every other memory, and a re-scan replaces
30
+ * rather than accumulates.
31
+ */
32
+ import { readdir, readFile, stat } from "node:fs/promises";
33
+ import { extname, join } from "node:path";
34
+ import { fileURLToPath } from "node:url";
35
+ const CATEGORY = "code-map";
36
+ /** Directories never worth walking for a signature map. */
37
+ const SKIP_DIRS = new Set([
38
+ ".git",
39
+ "node_modules",
40
+ ".venv",
41
+ "venv",
42
+ "__pycache__",
43
+ ".tox",
44
+ "dist",
45
+ "build",
46
+ "target",
47
+ "vendor",
48
+ ".next",
49
+ "coverage",
50
+ ".idea",
51
+ ".vscode",
52
+ ]);
53
+ const LANG_SPECS = {
54
+ javascript: {
55
+ grammar: "tree-sitter-javascript",
56
+ defNodes: new Set([
57
+ "function_declaration",
58
+ "generator_function_declaration",
59
+ "class_declaration",
60
+ "method_definition",
61
+ ]),
62
+ },
63
+ typescript: {
64
+ grammar: "tree-sitter-typescript",
65
+ defNodes: new Set([
66
+ "function_declaration",
67
+ "generator_function_declaration",
68
+ "class_declaration",
69
+ "abstract_class_declaration",
70
+ "method_definition",
71
+ "interface_declaration",
72
+ "type_alias_declaration",
73
+ "enum_declaration",
74
+ ]),
75
+ },
76
+ python: {
77
+ grammar: "tree-sitter-python",
78
+ defNodes: new Set(["function_definition", "class_definition"]),
79
+ },
80
+ go: {
81
+ grammar: "tree-sitter-go",
82
+ defNodes: new Set([
83
+ "function_declaration",
84
+ "method_declaration",
85
+ "type_declaration",
86
+ ]),
87
+ },
88
+ rust: {
89
+ grammar: "tree-sitter-rust",
90
+ defNodes: new Set([
91
+ "function_item",
92
+ "struct_item",
93
+ "enum_item",
94
+ "trait_item",
95
+ "impl_item",
96
+ "mod_item",
97
+ "macro_definition",
98
+ "type_item",
99
+ ]),
100
+ },
101
+ java: {
102
+ grammar: "tree-sitter-java",
103
+ defNodes: new Set([
104
+ "class_declaration",
105
+ "interface_declaration",
106
+ "enum_declaration",
107
+ "record_declaration",
108
+ "annotation_type_declaration",
109
+ "method_declaration",
110
+ "constructor_declaration",
111
+ ]),
112
+ },
113
+ c: {
114
+ grammar: "tree-sitter-c",
115
+ defNodes: new Set([
116
+ "function_definition",
117
+ "struct_specifier",
118
+ "union_specifier",
119
+ "enum_specifier",
120
+ "type_definition",
121
+ ]),
122
+ },
123
+ cpp: {
124
+ grammar: "tree-sitter-cpp",
125
+ defNodes: new Set([
126
+ "function_definition",
127
+ "class_specifier",
128
+ "struct_specifier",
129
+ "union_specifier",
130
+ "enum_specifier",
131
+ "namespace_definition",
132
+ "template_declaration",
133
+ "type_definition",
134
+ ]),
135
+ },
136
+ css: {
137
+ // A CSS "definition" is a selector rule or at-rule. The signatures
138
+ // extractor's "text up to the first {" already yields exactly the
139
+ // selector (`.nav > li`, `@media (max-width: 600px)`), so CSS uses
140
+ // the same path as code — it just has different node types.
141
+ grammar: "tree-sitter-css",
142
+ defNodes: new Set(["rule_set", "media_statement", "keyframes_statement", "supports_statement"]),
143
+ },
144
+ json: {
145
+ // JSON has no definitions; its "shape" is its top-level keys.
146
+ // NOTE: the project-facts ingester already summarises recognised
147
+ // JSON manifests by their keys — code-map JSON extends that to
148
+ // *every* .json file in the tree, at the cost of some overlap.
149
+ grammar: "tree-sitter-json",
150
+ defNodes: new Set(["pair"]),
151
+ extractor: "json-shape",
152
+ },
153
+ html: {
154
+ // HTML has no definitions; its useful skeleton is the set of
155
+ // elements bearing an `id` plus the major structural landmarks.
156
+ grammar: "tree-sitter-html",
157
+ defNodes: new Set(["element"]),
158
+ extractor: "html-skeleton",
159
+ },
160
+ csharp: {
161
+ // C# definition nodes: types (class/interface/struct/enum/record/delegate),
162
+ // namespace containers, constructors, and methods. Properties are
163
+ // intentionally excluded — they dominate DTO classes but add little
164
+ // navigation value beyond the containing type.
165
+ grammar: "tree-sitter-c_sharp",
166
+ defNodes: new Set([
167
+ "class_declaration",
168
+ "interface_declaration",
169
+ "struct_declaration",
170
+ "enum_declaration",
171
+ "record_declaration",
172
+ "delegate_declaration",
173
+ "namespace_declaration",
174
+ "file_scoped_namespace_declaration",
175
+ "constructor_declaration",
176
+ "method_declaration",
177
+ "operator_declaration",
178
+ "event_declaration",
179
+ ]),
180
+ },
181
+ php: {
182
+ // PHP definition nodes: functions, methods, and all type-like constructs.
183
+ // Namespaces included — they reveal the file's logical location within
184
+ // the package tree and are one of the most useful facts for navigation.
185
+ grammar: "tree-sitter-php",
186
+ defNodes: new Set([
187
+ "function_definition",
188
+ "method_declaration",
189
+ "class_declaration",
190
+ "interface_declaration",
191
+ "trait_declaration",
192
+ "enum_declaration",
193
+ "namespace_definition",
194
+ ]),
195
+ },
196
+ };
197
+ const EXT_TO_LANG = {
198
+ ".js": "javascript",
199
+ ".jsx": "javascript",
200
+ ".mjs": "javascript",
201
+ ".cjs": "javascript",
202
+ ".ts": "typescript",
203
+ ".tsx": "typescript",
204
+ ".mts": "typescript",
205
+ ".cts": "typescript",
206
+ ".py": "python",
207
+ ".pyi": "python",
208
+ ".go": "go",
209
+ ".rs": "rust",
210
+ ".java": "java",
211
+ // C vs C++: .h is ambiguous; convention says plain .h → C, and the
212
+ // C++-specific header suffixes → cpp. tree-sitter is error-tolerant,
213
+ // so a C++ .h parsed as C still yields most of its signatures.
214
+ ".c": "c",
215
+ ".h": "c",
216
+ ".cc": "cpp",
217
+ ".cpp": "cpp",
218
+ ".cxx": "cpp",
219
+ ".c++": "cpp",
220
+ ".hpp": "cpp",
221
+ ".hxx": "cpp",
222
+ ".hh": "cpp",
223
+ ".h++": "cpp",
224
+ ".css": "css",
225
+ ".json": "json",
226
+ ".jsonc": "json",
227
+ ".html": "html",
228
+ ".htm": "html",
229
+ ".cs": "csharp",
230
+ ".php": "php",
231
+ ".phtml": "php",
232
+ };
233
+ /** Don't parse files larger than this — huge generated files are noise. */
234
+ const MAX_FILE_BYTES = 400 * 1024;
235
+ /** Default cap on files scanned — adaptive config overrides per repo size. */
236
+ const DEFAULT_MAX_FILES = 4000;
237
+ /** Cap signatures stored per file — keeps each memory compact. */
238
+ const MAX_SIGS_PER_FILE = 40;
239
+ let enginePromise = null;
240
+ async function buildEngine(packageDir) {
241
+ // web-tree-sitter is a heavy, optional dependency — only touch it
242
+ // when code-map is actually being run.
243
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
244
+ let ParserClass;
245
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
246
+ let LanguageClass;
247
+ try {
248
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
249
+ const mod = await import("web-tree-sitter");
250
+ // web-tree-sitter 0.25.x exposes `Parser` and `Language` as named
251
+ // exports; the grammar loader moved from `Parser.Language.load` to a
252
+ // standalone `Language.load`. Fall back through the older shapes so
253
+ // a version bump in either direction degrades gracefully.
254
+ ParserClass = mod.Parser ?? mod.default?.Parser ?? mod.default ?? mod;
255
+ LanguageClass = mod.Language ?? mod.default?.Language ?? ParserClass?.Language;
256
+ await ParserClass.init();
257
+ if (typeof LanguageClass?.load !== "function") {
258
+ throw new Error("web-tree-sitter: no Language.load entry point");
259
+ }
260
+ }
261
+ catch (err) {
262
+ return {
263
+ unavailableReason: "web-tree-sitter unavailable: " +
264
+ (err instanceof Error ? err.message : String(err)),
265
+ };
266
+ }
267
+ const grammarsDir = resolveGrammarsDir(packageDir);
268
+ const languageCache = new Map();
269
+ async function getLanguage(lang) {
270
+ if (languageCache.has(lang))
271
+ return languageCache.get(lang) ?? null;
272
+ const spec = LANG_SPECS[lang];
273
+ if (!spec) {
274
+ languageCache.set(lang, null);
275
+ return null;
276
+ }
277
+ try {
278
+ const wasmPath = join(grammarsDir, `${spec.grammar}.wasm`);
279
+ const L = await LanguageClass.load(wasmPath);
280
+ languageCache.set(lang, L);
281
+ return L;
282
+ }
283
+ catch {
284
+ languageCache.set(lang, null); // grammar missing/incompatible — skip lang
285
+ return null;
286
+ }
287
+ }
288
+ return { ParserClass, getLanguage };
289
+ }
290
+ /** Lazily build (once) and return the shared tree-sitter engine. */
291
+ async function getEngine(packageDir) {
292
+ if (!enginePromise)
293
+ enginePromise = buildEngine(packageDir);
294
+ const engine = await enginePromise;
295
+ // Don't cache a transient failure — let a later call retry.
296
+ if ("unavailableReason" in engine)
297
+ enginePromise = null;
298
+ return engine;
299
+ }
300
+ /**
301
+ * Parse one source file and upsert its `code-map` memory. Shared by
302
+ * the full prefill walk and the per-file live refresh, so both produce
303
+ * byte-identical memories (same subject, same body shape) — which is
304
+ * what lets `upsertBySubject` cleanly replace a stale entry. Never
305
+ * throws; failures just leave counters untouched.
306
+ */
307
+ async function parseAndStoreFile(repo, root, path, lang,
308
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
309
+ parser, getLanguage, result) {
310
+ let src;
311
+ try {
312
+ const s = await stat(path);
313
+ if (!s.isFile() || s.size > MAX_FILE_BYTES) {
314
+ result.filesSkippedUnsupported += 1;
315
+ return;
316
+ }
317
+ src = await readFile(path, "utf-8");
318
+ }
319
+ catch {
320
+ return;
321
+ }
322
+ const L = await getLanguage(lang);
323
+ if (!L) {
324
+ result.filesSkippedUnsupported += 1;
325
+ return;
326
+ }
327
+ const spec = LANG_SPECS[lang];
328
+ let items;
329
+ try {
330
+ parser.setLanguage(L);
331
+ const tree = parser.parse(src);
332
+ const extractor = spec.extractor ?? "signatures";
333
+ if (extractor === "json-shape") {
334
+ items = extractJsonShape(tree.rootNode, src);
335
+ }
336
+ else if (extractor === "html-skeleton") {
337
+ items = extractHtmlSkeleton(tree.rootNode, src);
338
+ }
339
+ else {
340
+ items = extractSignatures(tree.rootNode, src, spec.defNodes);
341
+ }
342
+ }
343
+ catch {
344
+ return;
345
+ }
346
+ if (!result.languagesSeen.includes(lang))
347
+ result.languagesSeen.push(lang);
348
+ result.filesParsed += 1;
349
+ result.signaturesExtracted += items.length;
350
+ // The noun in the summary line depends on what was extracted —
351
+ // "definitions" for code, "selectors" for CSS, "keys" for JSON,
352
+ // "elements" for HTML — so the agent reads it correctly.
353
+ const noun = lang === "css"
354
+ ? "selector"
355
+ : lang === "json"
356
+ ? "top-level key"
357
+ : lang === "html"
358
+ ? "landmark element"
359
+ : "definition";
360
+ const rel = path.startsWith(root) ? path.slice(root.length).replace(/^\/+/, "") : path;
361
+ const shown = items.slice(0, MAX_SIGS_PER_FILE);
362
+ const body = shown.length === 0
363
+ ? `${rel} (${lang}): no ${noun}s found.`
364
+ : `${rel} (${lang}) — ${items.length} ${noun}${items.length === 1 ? "" : "s"}: ` +
365
+ shown.join(" · ") +
366
+ (items.length > shown.length ? ` … (+${items.length - shown.length} more)` : "");
367
+ repo.upsertBySubject({
368
+ category: CATEGORY,
369
+ subject: rel,
370
+ content: body,
371
+ tags: ["code-map", lang, rel],
372
+ source: "tree-sitter:code-map",
373
+ });
374
+ }
375
+ /**
376
+ * Re-index the code-map for a SINGLE file. This is what keeps the index
377
+ * honest when the agent edits code mid-session: the edited file's stale
378
+ * signature memory is replaced (via `upsertBySubject`) with one parsed
379
+ * from the file as it is now. Reuses the cached engine, so after the
380
+ * initial prefill a refresh is just a one-file parse. Never throws.
381
+ *
382
+ * Returns: "updated" (re-indexed, incl. a newly created file),
383
+ * "unsupported" (extension has no grammar — nothing to do),
384
+ * "unavailable" (tree-sitter could not load), or "error".
385
+ */
386
+ export async function ingestCodeMapForFile(repo, root, absPath, packageDir) {
387
+ const lang = EXT_TO_LANG[extname(absPath).toLowerCase()];
388
+ if (!lang)
389
+ return "unsupported";
390
+ let engine;
391
+ try {
392
+ engine = await getEngine(packageDir);
393
+ }
394
+ catch {
395
+ return "error";
396
+ }
397
+ if ("unavailableReason" in engine)
398
+ return "unavailable";
399
+ const result = {
400
+ filesParsed: 0,
401
+ filesSkippedUnsupported: 0,
402
+ signaturesExtracted: 0,
403
+ languagesSeen: [],
404
+ };
405
+ try {
406
+ const parser = new engine.ParserClass();
407
+ await parseAndStoreFile(repo, root, absPath, lang, parser, engine.getLanguage, result);
408
+ }
409
+ catch {
410
+ return "error";
411
+ }
412
+ return result.filesParsed > 0 ? "updated" : "unsupported";
413
+ }
414
+ export async function ingestCodeMap(repo, root, packageDir, maxFiles = DEFAULT_MAX_FILES) {
415
+ const result = {
416
+ filesParsed: 0,
417
+ filesSkippedUnsupported: 0,
418
+ signaturesExtracted: 0,
419
+ languagesSeen: [],
420
+ };
421
+ const engine = await getEngine(packageDir);
422
+ if ("unavailableReason" in engine) {
423
+ result.unavailableReason = engine.unavailableReason;
424
+ return result;
425
+ }
426
+ const eng = engine; // narrowed — closures below need the non-union type
427
+ const parser = new eng.ParserClass();
428
+ let filesVisited = 0;
429
+ async function walk(dir) {
430
+ if (filesVisited >= maxFiles)
431
+ return;
432
+ let entries;
433
+ try {
434
+ entries = await readdir(dir, { withFileTypes: true });
435
+ }
436
+ catch {
437
+ return;
438
+ }
439
+ for (const e of entries) {
440
+ if (filesVisited >= maxFiles)
441
+ return;
442
+ if (e.isDirectory()) {
443
+ if (SKIP_DIRS.has(e.name) || e.name.startsWith("."))
444
+ continue;
445
+ await walk(join(dir, e.name));
446
+ }
447
+ else if (e.isFile()) {
448
+ const lang = EXT_TO_LANG[extname(e.name).toLowerCase()];
449
+ if (!lang)
450
+ continue;
451
+ filesVisited += 1;
452
+ await parseAndStoreFile(repo, root, join(dir, e.name), lang, parser, eng.getLanguage, result);
453
+ }
454
+ }
455
+ }
456
+ await walk(root);
457
+ result.languagesSeen.sort();
458
+ repo.setIngestedAt(CATEGORY, Date.now());
459
+ return result;
460
+ }
461
+ /* ─── signature extraction ──────────────────────────────────────────── */
462
+ /**
463
+ * Depth-first walk collecting one signature line per definition node.
464
+ * A "signature" is the node's source text up to (but not including)
465
+ * its body — i.e. up to the first `{` or the first newline, whichever
466
+ * comes first — trimmed and length-capped. That captures
467
+ * `func (s *Server) Start() error`, `def parse(self, text):`,
468
+ * `interface Config`, etc. without any of the body. Pure function of
469
+ * (tree, source) so it is unit-testable without a repo.
470
+ */
471
+ export function extractSignatures(
472
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
473
+ rootNode, src, defNodes) {
474
+ const out = [];
475
+ const seen = new Set();
476
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
477
+ function visit(node) {
478
+ if (defNodes.has(node.type)) {
479
+ const sig = signatureOf(node, src);
480
+ if (sig && !seen.has(sig)) {
481
+ seen.add(sig);
482
+ out.push(sig);
483
+ }
484
+ }
485
+ for (let i = 0; i < node.childCount; i++) {
486
+ visit(node.child(i));
487
+ }
488
+ }
489
+ visit(rootNode);
490
+ return out;
491
+ }
492
+ // A line that is purely a C# attribute (`[Obsolete]`), Rust attribute
493
+ // (`#[derive(...)]`), or Java/Python annotation/decorator (`@Override`,
494
+ // `@staticmethod`). The whole-line anchors (`^…$`) matter: they keep an
495
+ // inline form like `[Foo] public void Bar()` from being mistaken for a
496
+ // metadata-only line and skipped.
497
+ const ATTRIBUTE_LINE_RE = /^(#?\[.*\]|@[\w.]+(\s*\([^)]*\))?)$/;
498
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
499
+ function signatureOf(node, src) {
500
+ const full = src.slice(node.startIndex, node.endIndex);
501
+ // A definition node often spans leading attribute/annotation lines
502
+ // before the actual declaration (pervasive in C#, common in Java,
503
+ // always-separate-line in Python). Drop those leading lines so the
504
+ // signature is the declaration itself, not `[DebuggerStepThrough]`.
505
+ const lines = full.split("\n");
506
+ let start = 0;
507
+ while (start < lines.length - 1) {
508
+ const t = lines[start].trim();
509
+ if (t.length === 0 || ATTRIBUTE_LINE_RE.test(t))
510
+ start += 1;
511
+ else
512
+ break;
513
+ }
514
+ const rest = lines.slice(start).join("\n");
515
+ // Cut at the body: first `{` or first newline, whichever is first.
516
+ let cut = rest.length;
517
+ const brace = rest.indexOf("{");
518
+ const nl = rest.indexOf("\n");
519
+ if (brace >= 0)
520
+ cut = Math.min(cut, brace);
521
+ if (nl >= 0)
522
+ cut = Math.min(cut, nl);
523
+ let sig = rest.slice(0, cut).trim();
524
+ // Python defs end with `:` — keep it; collapse internal whitespace.
525
+ sig = sig.replace(/\s+/g, " ");
526
+ if (sig.length > 140)
527
+ sig = sig.slice(0, 137) + "…";
528
+ return sig.length > 0 ? sig : null;
529
+ }
530
+ /* ─── structural extractors for non-code formats ────────────────────── */
531
+ /**
532
+ * JSON "shape": the TOP-LEVEL keys only (or a marker if the root is an
533
+ * array / scalar). A whole-tree walk would emit every nested key,
534
+ * which is noise — so this descends exactly one object level. Pure
535
+ * function of (tree, source); unit-testable without a repo.
536
+ */
537
+ export function extractJsonShape(
538
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
539
+ rootNode, src) {
540
+ // Find the root value node: tree-sitter-json wraps it in `document`.
541
+ let root = rootNode;
542
+ if (root && root.type === "document" && root.childCount > 0) {
543
+ // first non-comment child
544
+ for (let i = 0; i < root.childCount; i++) {
545
+ const c = root.child(i);
546
+ if (c && c.type !== "comment") {
547
+ root = c;
548
+ break;
549
+ }
550
+ }
551
+ }
552
+ if (!root)
553
+ return [];
554
+ if (root.type === "array")
555
+ return ["[root is a JSON array]"];
556
+ if (root.type !== "object")
557
+ return [`[root is a JSON ${root.type}]`];
558
+ const keys = [];
559
+ for (let i = 0; i < root.childCount; i++) {
560
+ const pair = root.child(i);
561
+ if (!pair || pair.type !== "pair")
562
+ continue;
563
+ // a `pair` is `key : value`; the key is the first `string` child.
564
+ let keyText = null;
565
+ for (let j = 0; j < pair.childCount; j++) {
566
+ const k = pair.child(j);
567
+ if (k && k.type === "string") {
568
+ keyText = src.slice(k.startIndex, k.endIndex).replace(/^["']|["']$/g, "");
569
+ break;
570
+ }
571
+ }
572
+ if (keyText)
573
+ keys.push(keyText);
574
+ }
575
+ return keys;
576
+ }
577
+ /**
578
+ * HTML "skeleton": elements that carry an `id` (rendered `tag#id`) and
579
+ * the major structural landmark tags (`header`, `main`, `nav`, `form`,
580
+ * `section`, …). A flat list of every element would be noise; this is
581
+ * the part of the document an agent actually navigates by. Pure
582
+ * function of (tree, source).
583
+ */
584
+ const HTML_LANDMARK_TAGS = new Set([
585
+ "header", "footer", "main", "nav", "aside", "section", "article",
586
+ "form", "table", "dialog", "template",
587
+ ]);
588
+ export function extractHtmlSkeleton(
589
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
590
+ rootNode, src) {
591
+ const out = [];
592
+ const seen = new Set();
593
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
594
+ function visit(node) {
595
+ if (node.type === "element" || node.type === "script_element" || node.type === "style_element") {
596
+ // The start tag holds the tag name and attributes.
597
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
598
+ let startTag = null;
599
+ for (let i = 0; i < node.childCount; i++) {
600
+ const c = node.child(i);
601
+ if (c && (c.type === "start_tag" || c.type === "self_closing_tag")) {
602
+ startTag = c;
603
+ break;
604
+ }
605
+ }
606
+ if (startTag) {
607
+ let tagName = "";
608
+ let idValue = null;
609
+ for (let i = 0; i < startTag.childCount; i++) {
610
+ const c = startTag.child(i);
611
+ if (!c)
612
+ continue;
613
+ if (c.type === "tag_name") {
614
+ tagName = src.slice(c.startIndex, c.endIndex);
615
+ }
616
+ else if (c.type === "attribute") {
617
+ // attribute = attribute_name [= (quoted_)attribute_value]
618
+ let attrName = "";
619
+ let attrVal = "";
620
+ for (let j = 0; j < c.childCount; j++) {
621
+ const a = c.child(j);
622
+ if (!a)
623
+ continue;
624
+ if (a.type === "attribute_name") {
625
+ attrName = src.slice(a.startIndex, a.endIndex);
626
+ }
627
+ else if (a.type === "quoted_attribute_value" || a.type === "attribute_value") {
628
+ attrVal = src.slice(a.startIndex, a.endIndex).replace(/^["']|["']$/g, "");
629
+ }
630
+ }
631
+ if (attrName.toLowerCase() === "id" && attrVal)
632
+ idValue = attrVal;
633
+ }
634
+ }
635
+ if (tagName) {
636
+ let entry = null;
637
+ if (idValue)
638
+ entry = `${tagName}#${idValue}`;
639
+ else if (HTML_LANDMARK_TAGS.has(tagName.toLowerCase()))
640
+ entry = `<${tagName}>`;
641
+ if (entry && !seen.has(entry)) {
642
+ seen.add(entry);
643
+ out.push(entry);
644
+ }
645
+ }
646
+ }
647
+ }
648
+ for (let i = 0; i < node.childCount; i++)
649
+ visit(node.child(i));
650
+ }
651
+ visit(rootNode);
652
+ return out;
653
+ }
654
+ /* ─── grammar path resolution ───────────────────────────────────────── */
655
+ /**
656
+ * Locate the vendored `grammars/` directory. Callers can pass the
657
+ * package directory explicitly; otherwise we resolve it relative to
658
+ * this compiled module — `dist/ingest/code-map.js` → `../../grammars`.
659
+ */
660
+ function resolveGrammarsDir(packageDir) {
661
+ if (packageDir)
662
+ return join(packageDir, "grammars");
663
+ try {
664
+ const here = fileURLToPath(import.meta.url); // .../dist/ingest/code-map.js
665
+ return join(here, "..", "..", "..", "grammars");
666
+ }
667
+ catch {
668
+ return join(process.cwd(), "grammars");
669
+ }
670
+ }