codebase-index 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. codebase_index/__init__.py +7 -0
  2. codebase_index/__main__.py +3 -0
  3. codebase_index/cli.py +916 -0
  4. codebase_index/config.py +110 -0
  5. codebase_index/discovery/__init__.py +10 -0
  6. codebase_index/discovery/classify.py +151 -0
  7. codebase_index/discovery/ignore.py +58 -0
  8. codebase_index/discovery/walker.py +75 -0
  9. codebase_index/doctor.py +138 -0
  10. codebase_index/embeddings/__init__.py +2 -0
  11. codebase_index/embeddings/backend.py +67 -0
  12. codebase_index/embeddings/external.py +56 -0
  13. codebase_index/embeddings/local.py +41 -0
  14. codebase_index/embeddings/noop.py +15 -0
  15. codebase_index/graph/__init__.py +8 -0
  16. codebase_index/graph/analysis.py +468 -0
  17. codebase_index/graph/builder.py +160 -0
  18. codebase_index/graph/expand.py +136 -0
  19. codebase_index/graph/export.py +381 -0
  20. codebase_index/graph/navigate.py +201 -0
  21. codebase_index/indexer/__init__.py +8 -0
  22. codebase_index/indexer/doc_chunks.py +202 -0
  23. codebase_index/indexer/freshness.py +109 -0
  24. codebase_index/indexer/pipeline.py +423 -0
  25. codebase_index/mcp/__init__.py +2 -0
  26. codebase_index/mcp/server.py +354 -0
  27. codebase_index/models.py +145 -0
  28. codebase_index/output/__init__.py +6 -0
  29. codebase_index/output/json.py +13 -0
  30. codebase_index/output/markdown.py +316 -0
  31. codebase_index/output/redact.py +31 -0
  32. codebase_index/parsers/__init__.py +9 -0
  33. codebase_index/parsers/base.py +47 -0
  34. codebase_index/parsers/languages.py +290 -0
  35. codebase_index/parsers/line_chunker.py +39 -0
  36. codebase_index/parsers/symbol_chunks.py +62 -0
  37. codebase_index/parsers/treesitter.py +439 -0
  38. codebase_index/retrieval/__init__.py +9 -0
  39. codebase_index/retrieval/budget.py +82 -0
  40. codebase_index/retrieval/fusion.py +62 -0
  41. codebase_index/retrieval/intent.py +56 -0
  42. codebase_index/retrieval/pipeline.py +207 -0
  43. codebase_index/retrieval/rerank.py +69 -0
  44. codebase_index/retrieval/searchers.py +291 -0
  45. codebase_index/retrieval/skeleton.py +251 -0
  46. codebase_index/retrieval/types.py +79 -0
  47. codebase_index/scaffold.py +399 -0
  48. codebase_index/service.py +158 -0
  49. codebase_index/skill_template/SKILL.md +198 -0
  50. codebase_index/skill_template/examples/hooks/settings.json +16 -0
  51. codebase_index/skill_template/scripts/cbx +25 -0
  52. codebase_index/skill_template/scripts/cbx.ps1 +25 -0
  53. codebase_index/skill_update.py +150 -0
  54. codebase_index/storage/__init__.py +8 -0
  55. codebase_index/storage/db.py +116 -0
  56. codebase_index/storage/repo.py +701 -0
  57. codebase_index/storage/schema.sql +125 -0
  58. codebase_index/watch/__init__.py +5 -0
  59. codebase_index/watch/watcher.py +93 -0
  60. codebase_index-1.6.0.dist-info/METADATA +748 -0
  61. codebase_index-1.6.0.dist-info/RECORD +64 -0
  62. codebase_index-1.6.0.dist-info/WHEEL +4 -0
  63. codebase_index-1.6.0.dist-info/entry_points.txt +4 -0
  64. codebase_index-1.6.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,316 @@
1
+ """Compact Markdown renderer for SearchResponse and dict payloads."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional
6
+
7
+ from ..models import ImpactResponse, RefsResponse, SearchResponse, SymbolResponse
8
+
9
+
10
+ def render(resp: SearchResponse | dict) -> str:
11
+ if isinstance(resp, dict):
12
+ return _render_dict(resp)
13
+ return _render_search_response(resp)
14
+
15
+
16
+ def _render_dict(payload: dict) -> str:
17
+ lines: list[str] = []
18
+ lines.append(f"**Query:** {payload['query']} ")
19
+ lines.append(
20
+ f"**Intent:** `{payload['intent']}` · **Confidence:** {payload['confidence']}\n"
21
+ )
22
+
23
+ if payload["results"]:
24
+ lines.append("| # | Path | Lines | Reason |")
25
+ lines.append("|---|------|-------|--------|")
26
+ for r in payload["results"]:
27
+ lines.append(
28
+ f"| {r['rank']} | `{r['path']}` | {r['line_start']}-{r['line_end']} "
29
+ f"| {r.get('reason', '')} |"
30
+ )
31
+ lines.append("")
32
+ for r in payload["results"]:
33
+ if r.get("snippet"):
34
+ lines.append(f"`{r['path']}:{r['line_start']}-{r['line_end']}`")
35
+ lines.append("```")
36
+ lines.append(r["snippet"])
37
+ lines.append("```")
38
+
39
+ if payload["recommended_reads"]:
40
+ lines.append("\n**Recommended reads:**")
41
+ for rr in payload["recommended_reads"]:
42
+ lines.append(f"- `{rr['path']}:{rr['line_start']}-{rr['line_end']}`")
43
+
44
+ fb = payload.get("fallback_suggestions", {}).get("ripgrep")
45
+ if fb:
46
+ lines.append("\n**Fallback (low confidence) — try:**")
47
+ for cmd in fb:
48
+ lines.append(f"- `{cmd}`")
49
+
50
+ pg = payload.get("pagination")
51
+ if pg:
52
+ shown = f"results {pg['offset'] + 1}–{pg['offset'] + len(payload['results'])}"
53
+ if pg.get("has_more"):
54
+ lines.append(f"\n_Showing {shown}; more available — `--offset {pg['next_offset']}`._")
55
+ else:
56
+ lines.append(f"\n_Showing {shown} (end of results)._")
57
+
58
+ return "\n".join(lines)
59
+
60
+
61
+ def _render_search_response(resp: SearchResponse) -> str:
62
+ lines: list[str] = []
63
+ freshness = "fresh" if not resp.index.stale else "STALE"
64
+ if not resp.index.exists:
65
+ freshness = "NO INDEX"
66
+ lines.append(
67
+ f"**query:** {resp.query} | **intent:** {resp.intent} | "
68
+ f"**confidence:** {resp.confidence} | **index:** {freshness}"
69
+ )
70
+ lines.append("")
71
+
72
+ if resp.results:
73
+ lines.append("| # | path | lines | reason |")
74
+ lines.append("|---|------|-------|--------|")
75
+ for result in resp.results:
76
+ symbols = f" `{','.join(result.symbols)}`" if result.symbols else ""
77
+ lines.append(
78
+ f"| {result.rank} | `{result.path}`{symbols} | "
79
+ f"{result.line_start}-{result.line_end} | {result.reason} |"
80
+ )
81
+ lines.append("")
82
+ for result in resp.results:
83
+ if result.snippet:
84
+ lines.append(f"`{result.path}:{result.line_start}-{result.line_end}`")
85
+ lines.append("```")
86
+ lines.append(result.snippet)
87
+ lines.append("```")
88
+ lines.append("")
89
+ else:
90
+ lines.append("_No index matches._")
91
+ lines.append("")
92
+
93
+ if resp.recommended_reads:
94
+ lines.append("**recommended reads:**")
95
+ for read in resp.recommended_reads:
96
+ lines.append(f"- `{read.path}:{read.line_start}-{read.line_end}`")
97
+ lines.append("")
98
+
99
+ if resp.fallback_suggestions:
100
+ lines.append("**fallback:**")
101
+ for commands in resp.fallback_suggestions.values():
102
+ for command in commands:
103
+ lines.append(f"- `{command}`")
104
+
105
+ return "\n".join(lines).rstrip() + "\n"
106
+
107
+
108
+ def render_symbols(resp: SymbolResponse) -> str:
109
+ lines = [_header(resp.query, resp.index.exists, resp.index.stale)]
110
+ lines.append("")
111
+ if not resp.symbols:
112
+ lines.append("_No symbol definitions found._")
113
+ return "\n".join(lines).rstrip() + "\n"
114
+
115
+ lines.append("| name | kind | path | lines | signature |")
116
+ lines.append("|------|------|------|-------|-----------|")
117
+ for symbol in resp.symbols:
118
+ display = symbol.qualified or symbol.name
119
+ signature = symbol.signature or ""
120
+ lines.append(
121
+ f"| `{display}` | {symbol.kind} | `{symbol.path}` | "
122
+ f"{symbol.line_start}-{symbol.line_end} | `{signature}` |"
123
+ )
124
+ return "\n".join(lines).rstrip() + "\n"
125
+
126
+
127
+ def _coverage_line(coverage) -> Optional[str]:
128
+ if coverage is not None and getattr(coverage, "partial", False):
129
+ return f"\n> ⚠️ Partial graph coverage: {coverage.reason}"
130
+ return None
131
+
132
+
133
+ # Audit-trail glyphs: an exact edge needs no annotation; inferred/ambiguous ones
134
+ # warn the reader that the link is a heuristic or could not be pinned down.
135
+ _CONF_MARK = {"extracted": "", "inferred": "~ inferred", "ambiguous": "? ambiguous"}
136
+
137
+
138
+ def _conf_mark(confidence: Optional[str]) -> str:
139
+ return _CONF_MARK.get(confidence or "extracted", confidence or "")
140
+
141
+
142
+ def render_refs(resp: RefsResponse) -> str:
143
+ lines = [_header(resp.query, resp.index.exists, resp.index.stale)]
144
+ lines.append("")
145
+ note = _coverage_line(resp.coverage)
146
+ if not resp.sites:
147
+ lines.append("_No references found._")
148
+ if note:
149
+ lines.append(note)
150
+ return "\n".join(lines).rstrip() + "\n"
151
+
152
+ lines.append("| kind | path | line | confidence |")
153
+ lines.append("|------|------|------|------------|")
154
+ for site in resp.sites:
155
+ lines.append(
156
+ f"| {site.kind} | `{site.path}` | {site.line} | {_conf_mark(site.confidence) or 'exact'} |"
157
+ )
158
+ if note:
159
+ lines.append(note)
160
+ return "\n".join(lines).rstrip() + "\n"
161
+
162
+
163
+ def _node_label(ref: dict) -> str:
164
+ name = ref.get("name")
165
+ path = ref.get("path") or ""
166
+ return f"`{name}` ({path})" if name and ref.get("kind") == "symbol" else f"`{path}`"
167
+
168
+
169
+ def render_path(payload: dict) -> str:
170
+ """Render a path between two nodes as an arrow chain annotated with edge types."""
171
+ head = f"**path:** `{payload['src']}` → `{payload['dst']}`"
172
+ if not payload.get("found"):
173
+ return f"{head}\n\n_{payload.get('reason', 'No path found.')}_\n"
174
+
175
+ lines = [f"{head} · **{payload.get('hops', 0)} hop(s)**", ""]
176
+ nodes = payload.get("nodes", [])
177
+ steps = payload.get("steps", [])
178
+ # Render as: A --edge(conf)--> B --edge--> C
179
+ if nodes:
180
+ lines.append(_node_label(nodes[0]))
181
+ for step, nxt in zip(steps, nodes[1:]):
182
+ mark = _conf_mark(step.get("confidence"))
183
+ edge = f"{step['edge_type']}{' ' + mark if mark else ''}"
184
+ arrow = "→" if step.get("direction") == "out" else "←"
185
+ lines.append(f" {arrow} _{edge}_ {arrow}")
186
+ lines.append(_node_label(nxt))
187
+ return "\n".join(lines).rstrip() + "\n"
188
+
189
+
190
+ def render_describe(payload: dict) -> str:
191
+ """Render a symbol node card: definition, centrality, callers, callees."""
192
+ head = f"**describe:** `{payload['query']}`"
193
+ if not payload.get("found"):
194
+ return f"{head}\n\n_{payload.get('reason', 'Not found.')}_\n"
195
+
196
+ p = payload.get("primary", {})
197
+ god = f" · god node #{p['god_rank']}" if p.get("god_rank") else ""
198
+ lines = [
199
+ f"{head} · module `{p.get('module', '?')}` · "
200
+ f"in {p.get('in_degree', 0)} / out {p.get('out_degree', 0)}{god}",
201
+ "",
202
+ ]
203
+
204
+ defs = payload.get("definitions", [])
205
+ if defs:
206
+ lines.append("**definition(s):**")
207
+ for d in defs:
208
+ sig = f" — `{d['signature']}`" if d.get("signature") else ""
209
+ lines.append(f"- {d['kind']} `{d.get('qualified') or d['name']}` "
210
+ f"at `{d['path']}:{d['line_start']}`{sig}")
211
+ lines.append("")
212
+
213
+ callers = payload.get("callers", [])
214
+ if callers:
215
+ lines.append(f"**callers ({len(callers)}):**")
216
+ for c in callers[:20]:
217
+ mark = _conf_mark(c.get("confidence"))
218
+ lines.append(f"- `{c['path']}:{c['line']}`{' · ' + mark if mark else ''}")
219
+ lines.append("")
220
+
221
+ callees = payload.get("callees", [])
222
+ if callees:
223
+ lines.append(f"**callees ({len(callees)}):**")
224
+ for c in callees[:20]:
225
+ mark = _conf_mark(c.get("confidence"))
226
+ lines.append(f"- {_node_label(c)} _{c.get('edge_type', '')}_"
227
+ f"{' · ' + mark if mark else ''}")
228
+ lines.append("")
229
+
230
+ return "\n".join(lines).rstrip() + "\n"
231
+
232
+
233
+ def render_architecture(payload: dict) -> str:
234
+ """Render the architecture overview: modules, god nodes, surprising links, questions."""
235
+ if not payload.get("available", False):
236
+ reason = payload.get("reason", "No architecture analysis available.")
237
+ return f"_{reason}_\n"
238
+
239
+ idx = payload.get("index", {})
240
+ freshness = "fresh" if not idx.get("stale") else "STALE"
241
+ lines = [
242
+ f"**Architecture overview** | **index:** {freshness} | "
243
+ f"{payload.get('node_count', 0)} nodes · {payload.get('edge_count', 0)} edges · "
244
+ f"{payload.get('community_count', 0)} modules · modularity {payload.get('modularity', 0)}",
245
+ "",
246
+ ]
247
+
248
+ communities = payload.get("communities", [])
249
+ if communities:
250
+ lines.append("### Modules")
251
+ lines.append("| # | module | size | key nodes |")
252
+ lines.append("|---|--------|------|-----------|")
253
+ for c in communities:
254
+ tops = ", ".join(f"`{t['name']}`" for t in c.get("top_nodes", [])[:4])
255
+ lines.append(f"| {c['id']} | {c['label']} | {c['size']} | {tops} |")
256
+ lines.append("")
257
+
258
+ gods = payload.get("god_nodes", [])
259
+ if gods:
260
+ lines.append("### God nodes (most-connected)")
261
+ lines.append("| node | kind | degree | location |")
262
+ lines.append("|------|------|--------|----------|")
263
+ for g in gods:
264
+ loc = g.get("path") or ""
265
+ lines.append(f"| `{g['name']}` | {g['kind']} | {g['degree']} | `{loc}` |")
266
+ lines.append("")
267
+
268
+ surprising = payload.get("surprising", [])
269
+ if surprising:
270
+ lines.append("### Surprising connections (cross-module bridges)")
271
+ for s in surprising:
272
+ fr, to = s["from"], s["to"]
273
+ lines.append(
274
+ f"- `{fr['name']}` ({fr.get('path') or '?'}) ↔ "
275
+ f"`{to['name']}` ({to.get('path') or '?'}) — {s['edge_count']} edge(s)"
276
+ )
277
+ lines.append("")
278
+
279
+ questions = payload.get("questions", [])
280
+ if questions:
281
+ lines.append("### Suggested questions")
282
+ for q in questions:
283
+ lines.append(f"- {q}")
284
+ lines.append("")
285
+
286
+ return "\n".join(lines).rstrip() + "\n"
287
+
288
+
289
+ def _header(query: str, exists: bool, stale: bool) -> str:
290
+ freshness = "fresh" if not stale else "STALE"
291
+ if not exists:
292
+ freshness = "NO INDEX"
293
+ return f"**query:** {query} | **index:** {freshness}"
294
+
295
+
296
+ def render_impact(resp: ImpactResponse) -> str:
297
+ header = (f"**impact:** `{resp.target}` · **direction:** {resp.direction} · "
298
+ f"**depth:** {resp.depth} · **affected files:** {len(resp.files)}")
299
+ lines = [header, ""]
300
+ note = _coverage_line(resp.coverage)
301
+ if not resp.nodes:
302
+ body = ["_No impact found (target unknown or no edges)._"]
303
+ if note:
304
+ body.append(note)
305
+ return "\n".join(lines + body + [""]).rstrip() + "\n"
306
+ lines.append("| dist | via | kind | node | location |")
307
+ lines.append("|------|-----|------|------|----------|")
308
+ for n in sorted(resp.nodes, key=lambda x: (x.distance, x.path, x.line_start or 0)):
309
+ loc = f"{n.path}:{n.line_start}" if n.line_start else n.path
310
+ node_name = f"`{n.name}`" if n.name else "—"
311
+ mark = _conf_mark(n.via_confidence)
312
+ via = f"{n.via_edge or ''} {mark}".strip()
313
+ lines.append(f"| {n.distance} | {via} | {n.kind} | {node_name} | `{loc}` |")
314
+ if note:
315
+ lines.append(note)
316
+ return "\n".join(lines).rstrip() + "\n"
@@ -0,0 +1,31 @@
1
+ """Conservative output-time secret redaction."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ _PRIVATE_KEY_RE = re.compile(
8
+ r"-----BEGIN ([A-Z ]*PRIVATE KEY)-----.*?-----END \1-----",
9
+ re.DOTALL,
10
+ )
11
+ _AWS_ACCESS_KEY_RE = re.compile(r"\bAKIA[0-9A-Z]{16}\b")
12
+ _ASSIGNED_SECRET_RE = re.compile(
13
+ r"(?i)\b(api[_-]?key|access[_-]?token|secret|token|password)\b"
14
+ r"(\s*[:=]\s*)"
15
+ r"([\"']?)[A-Za-z0-9_./+=:-]{16,}\3"
16
+ )
17
+
18
+
19
+ def redact_snippet(text: str) -> str:
20
+ text = _PRIVATE_KEY_RE.sub(_redact_private_key, text)
21
+ text = _AWS_ACCESS_KEY_RE.sub("<<redacted:aws_access_key>>", text)
22
+ return _ASSIGNED_SECRET_RE.sub(
23
+ lambda m: f"{m.group(1)}{m.group(2)}<<redacted:secret>>", text
24
+ )
25
+
26
+
27
+ def _redact_private_key(match: re.Match[str]) -> str:
28
+ return "\n".join(
29
+ "<<redacted:private_key>>" if line and not line.startswith("-----") else line
30
+ for line in match.group(0).splitlines()
31
+ )
@@ -0,0 +1,9 @@
1
+ """Parsers turn an eligible file into chunks + symbols.
2
+
3
+ base.py : Parser protocol -> parse(path, text) -> (list[Chunk], list[Symbol]).
4
+ treesitter.py : AST-based symbol extraction using tree-sitter grammars.
5
+ line_chunker.py : fallback line-window chunking for unsupported / unparseable files.
6
+ languages.py : grammar registry + per-language node->symbol-kind maps + import/call queries.
7
+
8
+ Selection: treesitter when a grammar exists for the detected language, else line_chunker.
9
+ """
@@ -0,0 +1,47 @@
1
+ """Shared parser types."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Optional, Protocol
7
+
8
+
9
+ @dataclass
10
+ class Chunk:
11
+ line_start: int
12
+ line_end: int
13
+ content: str
14
+ token_est: int
15
+ kind: str = "window"
16
+ symbol_index: Optional[int] = None
17
+
18
+
19
+ @dataclass
20
+ class Symbol:
21
+ name: str
22
+ kind: str
23
+ line_start: int
24
+ line_end: int
25
+ qualified: Optional[str] = None
26
+ signature: Optional[str] = None
27
+ parent_index: Optional[int] = None
28
+ docstring: Optional[str] = None
29
+
30
+
31
+ @dataclass
32
+ class Edge:
33
+ edge_type: str
34
+ callee_name: str
35
+ line: int
36
+ src_symbol_index: Optional[int] = None
37
+
38
+
39
+ @dataclass
40
+ class ParseResult:
41
+ chunks: list[Chunk] = field(default_factory=list)
42
+ symbols: list[Symbol] = field(default_factory=list)
43
+ edges: list[Edge] = field(default_factory=list)
44
+
45
+
46
+ class Parser(Protocol):
47
+ def parse(self, text: str) -> ParseResult: ...
@@ -0,0 +1,290 @@
1
+ """Per-language tree-sitter specs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Optional
7
+
8
+ CONTAINER_KINDS = {"class", "interface", "enum", "struct", "trait", "impl", "record"}
9
+
10
+
11
+ _PY_IMPORTS = """
12
+ (import_from_statement module_name: (dotted_name) @import.module)
13
+ (import_statement name: (dotted_name) @import.module)
14
+ (class_definition superclasses: (argument_list (identifier) @extends.base))
15
+ """
16
+
17
+ _JS_IMPORTS = """
18
+ (import_statement source: (string (string_fragment) @import.module))
19
+ (class_declaration (class_heritage (identifier) @extends.base))
20
+ """
21
+
22
+ _TS_IMPORTS = """
23
+ (import_statement source: (string (string_fragment) @import.module))
24
+ (class_declaration (class_heritage
25
+ (extends_clause value: (identifier) @extends.base)))
26
+ (class_declaration (class_heritage
27
+ (implements_clause (type_identifier) @implements.iface)))
28
+ """
29
+
30
+
31
+ @dataclass(frozen=True)
32
+ class LangSpec:
33
+ name: str
34
+ ts_name: str
35
+ defs_query: str
36
+ calls_query: str
37
+ imports_query: str = ""
38
+
39
+
40
+ _PYTHON = LangSpec(
41
+ name="python",
42
+ ts_name="python",
43
+ defs_query="""
44
+ (function_definition name: (identifier) @name) @def.function
45
+ (class_definition name: (identifier) @name) @def.class
46
+ """,
47
+ calls_query="""
48
+ (call function: (identifier) @callee)
49
+ (call function: (attribute attribute: (identifier) @callee))
50
+ """,
51
+ imports_query=_PY_IMPORTS,
52
+ )
53
+
54
+ _JS_DEFS = """
55
+ (function_declaration name: (identifier) @name) @def.function
56
+ (class_declaration name: (identifier) @name) @def.class
57
+ (method_definition name: (property_identifier) @name) @def.method
58
+ (variable_declarator name: (identifier) @name value: (arrow_function)) @def.function
59
+ (variable_declarator name: (identifier) @name value: (function_expression)) @def.function
60
+ """
61
+ _JS_CALLS = """
62
+ (call_expression function: (identifier) @callee)
63
+ (call_expression function: (member_expression property: (property_identifier) @callee))
64
+ """
65
+
66
+ _JAVASCRIPT = LangSpec(
67
+ name="javascript",
68
+ ts_name="javascript",
69
+ defs_query=_JS_DEFS,
70
+ calls_query=_JS_CALLS,
71
+ imports_query=_JS_IMPORTS,
72
+ )
73
+
74
+ _TS_DEFS = """
75
+ (function_declaration name: (identifier) @name) @def.function
76
+ (class_declaration name: (type_identifier) @name) @def.class
77
+ (method_definition name: (property_identifier) @name) @def.method
78
+ (variable_declarator name: (identifier) @name value: (arrow_function)) @def.function
79
+ (interface_declaration name: (type_identifier) @name) @def.interface
80
+ (enum_declaration name: (identifier) @name) @def.enum
81
+ (type_alias_declaration name: (type_identifier) @name) @def.type
82
+ """
83
+
84
+ _TYPESCRIPT = LangSpec(
85
+ name="typescript",
86
+ ts_name="typescript",
87
+ defs_query=_TS_DEFS,
88
+ calls_query=_JS_CALLS,
89
+ imports_query=_TS_IMPORTS,
90
+ )
91
+
92
+ # --- Tier A: compiled / back-end languages ------------------------------------------------------
93
+ # NOTE: symbol extraction is driven by treesitter._definition_kind (node-type mapping), not by
94
+ # defs_query. These queries are kept as compile-checked documentation of the relevant node types
95
+ # and to power graph edges (imports_query). See tests/test_languages.py.
96
+
97
+ _JAVA = LangSpec(
98
+ name="java",
99
+ ts_name="java",
100
+ defs_query="""
101
+ (class_declaration name: (identifier) @name) @def.class
102
+ (interface_declaration name: (identifier) @name) @def.interface
103
+ (enum_declaration name: (identifier) @name) @def.enum
104
+ (record_declaration name: (identifier) @name) @def.record
105
+ (method_declaration name: (identifier) @name) @def.method
106
+ (constructor_declaration name: (identifier) @name) @def.method
107
+ """,
108
+ calls_query="(method_invocation name: (identifier) @callee)",
109
+ imports_query="""
110
+ (import_declaration (scoped_identifier) @import.module)
111
+ (superclass (type_identifier) @extends.base)
112
+ (super_interfaces (type_list (type_identifier) @implements.iface))
113
+ """,
114
+ )
115
+
116
+ _GO = LangSpec(
117
+ name="go",
118
+ ts_name="go",
119
+ defs_query="""
120
+ (function_declaration name: (identifier) @name) @def.function
121
+ (method_declaration name: (field_identifier) @name) @def.method
122
+ (type_spec name: (type_identifier) @name) @def.type
123
+ """,
124
+ calls_query="(call_expression function: (identifier) @callee)",
125
+ imports_query="(import_spec (interpreted_string_literal) @import.module)",
126
+ )
127
+
128
+ _RUST = LangSpec(
129
+ name="rust",
130
+ ts_name="rust",
131
+ defs_query="""
132
+ (function_item name: (identifier) @name) @def.function
133
+ (struct_item name: (type_identifier) @name) @def.struct
134
+ (enum_item name: (type_identifier) @name) @def.enum
135
+ (trait_item name: (type_identifier) @name) @def.trait
136
+ (impl_item type: (type_identifier) @name) @def.impl
137
+ (mod_item name: (identifier) @name) @def.module
138
+ """,
139
+ calls_query="(call_expression function: (identifier) @callee)",
140
+ imports_query="""
141
+ (use_declaration (scoped_identifier) @import.module)
142
+ (use_declaration (identifier) @import.module)
143
+ (use_declaration (use_as_clause) @import.module)
144
+ (use_declaration (scoped_use_list) @import.module)
145
+ """,
146
+ )
147
+
148
+ _C = LangSpec(
149
+ name="c",
150
+ ts_name="c",
151
+ defs_query="""
152
+ (function_definition
153
+ declarator: (function_declarator declarator: (identifier) @name)) @def.function
154
+ (struct_specifier name: (type_identifier) @name) @def.struct
155
+ """,
156
+ calls_query="(call_expression function: (identifier) @callee)",
157
+ imports_query="""
158
+ (preproc_include path: (system_lib_string) @import.module)
159
+ (preproc_include path: (string_literal) @import.module)
160
+ """,
161
+ )
162
+
163
+ _CPP = LangSpec(
164
+ name="cpp",
165
+ ts_name="cpp",
166
+ defs_query="""
167
+ (function_definition
168
+ declarator: (function_declarator declarator: (identifier) @name)) @def.function
169
+ (class_specifier name: (type_identifier) @name) @def.class
170
+ (struct_specifier name: (type_identifier) @name) @def.struct
171
+ (namespace_definition name: (namespace_identifier) @name) @def.module
172
+ """,
173
+ calls_query="(call_expression function: (identifier) @callee)",
174
+ imports_query="""
175
+ (preproc_include path: (system_lib_string) @import.module)
176
+ (preproc_include path: (string_literal) @import.module)
177
+ (base_class_clause (type_identifier) @extends.base)
178
+ """,
179
+ )
180
+
181
+ _CSHARP = LangSpec(
182
+ name="csharp",
183
+ ts_name="csharp",
184
+ defs_query="""
185
+ (class_declaration name: (identifier) @name) @def.class
186
+ (interface_declaration name: (identifier) @name) @def.interface
187
+ (struct_declaration name: (identifier) @name) @def.struct
188
+ (enum_declaration name: (identifier) @name) @def.enum
189
+ (method_declaration name: (identifier) @name) @def.method
190
+ (constructor_declaration name: (identifier) @name) @def.method
191
+ """,
192
+ calls_query="(invocation_expression function: (identifier) @callee)",
193
+ imports_query="""
194
+ (using_directive (identifier) @import.module)
195
+ (using_directive (qualified_name) @import.module)
196
+ (base_list (identifier) @extends.base)
197
+ """,
198
+ )
199
+
200
+ _RUBY = LangSpec(
201
+ name="ruby",
202
+ ts_name="ruby",
203
+ defs_query="""
204
+ (class name: (constant) @name) @def.class
205
+ (module name: (constant) @name) @def.module
206
+ (method name: (identifier) @name) @def.method
207
+ """,
208
+ calls_query="(call method: (identifier) @callee)",
209
+ imports_query="(superclass (constant) @extends.base)",
210
+ )
211
+
212
+ _PHP = LangSpec(
213
+ name="php",
214
+ ts_name="php",
215
+ defs_query="""
216
+ (class_declaration name: (name) @name) @def.class
217
+ (interface_declaration name: (name) @name) @def.interface
218
+ (trait_declaration name: (name) @name) @def.trait
219
+ (method_declaration name: (name) @name) @def.method
220
+ (function_definition name: (name) @name) @def.function
221
+ """,
222
+ calls_query="(function_call_expression function: (name) @callee)",
223
+ imports_query="""
224
+ (namespace_use_declaration (namespace_use_clause (qualified_name) @import.module))
225
+ (base_clause (name) @extends.base)
226
+ (class_interface_clause (name) @implements.iface)
227
+ """,
228
+ )
229
+
230
+ _KOTLIN = LangSpec(
231
+ name="kotlin",
232
+ ts_name="kotlin",
233
+ defs_query="""
234
+ (class_declaration (type_identifier) @name) @def.class
235
+ (object_declaration (type_identifier) @name) @def.class
236
+ (function_declaration (simple_identifier) @name) @def.function
237
+ """,
238
+ calls_query="(call_expression (simple_identifier) @callee)",
239
+ imports_query="(import_header (identifier) @import.module)",
240
+ )
241
+
242
+ LANGS: dict[str, LangSpec] = {
243
+ s.name: s
244
+ for s in (
245
+ _PYTHON,
246
+ _JAVASCRIPT,
247
+ _TYPESCRIPT,
248
+ _JAVA,
249
+ _GO,
250
+ _RUST,
251
+ _C,
252
+ _CPP,
253
+ _CSHARP,
254
+ _RUBY,
255
+ _PHP,
256
+ _KOTLIN,
257
+ )
258
+ }
259
+
260
+
261
+ def has_grammar(lang: Optional[str]) -> bool:
262
+ """True if a tree-sitter grammar is loadable for `lang` (Tier B eligibility)."""
263
+ if not lang:
264
+ return False
265
+ try:
266
+ from tree_sitter_language_pack import get_language
267
+
268
+ return get_language(lang) is not None
269
+ except Exception:
270
+ return False
271
+
272
+
273
+ def is_supported(lang: Optional[str]) -> bool:
274
+ """A language is supported if it has a Tier-A spec OR a loadable Tier-B grammar."""
275
+ if lang in LANGS:
276
+ return True
277
+ return has_grammar(lang)
278
+
279
+
280
+ def spec_for(lang: Optional[str]) -> Optional[LangSpec]:
281
+ return LANGS.get(lang) if lang else None
282
+
283
+
284
+ def has_full_graph(lang: Optional[str]) -> bool:
285
+ """True if `lang` has a Tier-A spec (full import/inheritance edges for refs/impact).
286
+
287
+ Tier-B languages (a loadable grammar but no hand-tuned spec) yield symbols and
288
+ best-effort call sites only, so their dependency graph is partial.
289
+ """
290
+ return spec_for(lang) is not None