cerebro-code-memory 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cerebro/__init__.py +3 -0
- cerebro/callgraph.py +38 -0
- cerebro/cli.py +348 -0
- cerebro/config.py +136 -0
- cerebro/db.py +245 -0
- cerebro/docaudit.py +174 -0
- cerebro/embeddings.py +175 -0
- cerebro/gitsync.py +124 -0
- cerebro/graph.py +77 -0
- cerebro/indexer.py +854 -0
- cerebro/insights.py +217 -0
- cerebro/notes.py +70 -0
- cerebro/server.py +382 -0
- cerebro/summaries.py +66 -0
- cerebro/summarizer.py +109 -0
- cerebro/tsconfig.py +159 -0
- cerebro/views.py +52 -0
- cerebro/viz.py +374 -0
- cerebro_code_memory-0.1.0.dist-info/METADATA +160 -0
- cerebro_code_memory-0.1.0.dist-info/RECORD +23 -0
- cerebro_code_memory-0.1.0.dist-info/WHEEL +4 -0
- cerebro_code_memory-0.1.0.dist-info/entry_points.txt +11 -0
- cerebro_code_memory-0.1.0.dist-info/licenses/LICENSE +21 -0
cerebro/indexer.py
ADDED
|
@@ -0,0 +1,854 @@
|
|
|
1
|
+
"""Static indexer: hash files, extract symbols + imports, resolve dependency edges.
|
|
2
|
+
|
|
3
|
+
No LLM is involved here — this layer is deterministic, fast, and free. It is the
|
|
4
|
+
structural map (plan layers 1 and 5). Summaries (layer 2) are written separately
|
|
5
|
+
by the chat sessions via summaries.record().
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import hashlib
|
|
10
|
+
import json
|
|
11
|
+
import posixpath
|
|
12
|
+
import re
|
|
13
|
+
import threading
|
|
14
|
+
from datetime import datetime, timezone
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
from . import config as cfg
|
|
18
|
+
from . import db
|
|
19
|
+
from . import tsconfig
|
|
20
|
+
|
|
21
|
+
try:
|
|
22
|
+
from tree_sitter_language_pack import get_parser
|
|
23
|
+
except Exception: # pragma: no cover - import guard for environments w/o the pack
|
|
24
|
+
get_parser = None
|
|
25
|
+
|
|
26
|
+
# tree-sitter's Parser objects (Rust/pyo3 binding) are unsendable across threads —
|
|
27
|
+
# sharing one cache between threads panics. FastMCP runs sync tools in a worker
|
|
28
|
+
# thread pool, so the cache must be thread-local.
|
|
29
|
+
_PARSER_TLS = threading.local()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _parser(lang: str):
|
|
33
|
+
if get_parser is None:
|
|
34
|
+
return None
|
|
35
|
+
cache = getattr(_PARSER_TLS, "parsers", None)
|
|
36
|
+
if cache is None:
|
|
37
|
+
cache = {}
|
|
38
|
+
_PARSER_TLS.parsers = cache
|
|
39
|
+
if lang not in cache:
|
|
40
|
+
try:
|
|
41
|
+
cache[lang] = get_parser(lang)
|
|
42
|
+
except Exception:
|
|
43
|
+
cache[lang] = None
|
|
44
|
+
return cache[lang]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def now_iso() -> str:
|
|
48
|
+
return datetime.now(timezone.utc).isoformat(timespec="seconds")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def file_hash(path: Path) -> str:
|
|
52
|
+
h = hashlib.sha1()
|
|
53
|
+
with open(path, "rb") as f:
|
|
54
|
+
for chunk in iter(lambda: f.read(65536), b""):
|
|
55
|
+
h.update(chunk)
|
|
56
|
+
return h.hexdigest()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# --- disk diff ---------------------------------------------------------------
|
|
60
|
+
|
|
61
|
+
def disk_state(config: cfg.Config) -> dict[str, str]:
|
|
62
|
+
"""Map relative path -> current on-disk hash for all indexable files."""
|
|
63
|
+
state: dict[str, str] = {}
|
|
64
|
+
for rel, abs_path in config.iter_files():
|
|
65
|
+
try:
|
|
66
|
+
state[rel] = file_hash(abs_path)
|
|
67
|
+
except OSError:
|
|
68
|
+
continue
|
|
69
|
+
return state
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def diff(conn, disk: dict[str, str]) -> dict[str, list[str]]:
|
|
73
|
+
stored = db.stored_hashes(conn)
|
|
74
|
+
new = [p for p in disk if p not in stored]
|
|
75
|
+
changed = [p for p in disk if p in stored and disk[p] != stored[p]]
|
|
76
|
+
deleted = [p for p in stored if p not in disk]
|
|
77
|
+
return {"new": sorted(new), "changed": sorted(changed), "deleted": sorted(deleted)}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# --- tree-sitter node accessors ----------------------------------------------
|
|
81
|
+
# tree-sitter-language-pack ships a binding whose Node members are *methods*
|
|
82
|
+
# (node.kind(), node.start_byte()) rather than the properties of the standard
|
|
83
|
+
# py-tree-sitter package (node.type, node.start_byte). These helpers normalize
|
|
84
|
+
# both so the extraction logic stays clean and version-resilient.
|
|
85
|
+
|
|
86
|
+
def _attr(node, *names):
|
|
87
|
+
for name in names:
|
|
88
|
+
if hasattr(node, name):
|
|
89
|
+
v = getattr(node, name)
|
|
90
|
+
return v() if callable(v) else v
|
|
91
|
+
raise AttributeError(f"node has none of {names}")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _kind(n) -> str:
|
|
95
|
+
return _attr(n, "kind", "type")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _child_count(n) -> int:
|
|
99
|
+
return _attr(n, "child_count")
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _children(n):
|
|
103
|
+
return [n.child(i) for i in range(_child_count(n))]
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _field(n, name):
|
|
107
|
+
return n.child_by_field_name(name)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _line(n) -> int:
|
|
111
|
+
pos = _attr(n, "start_position", "start_point")
|
|
112
|
+
return getattr(pos, "row", 0) + 1
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _text(src: bytes, n) -> str:
|
|
116
|
+
# tree-sitter reports *byte* offsets; slice the UTF-8 bytes, never the str,
|
|
117
|
+
# or multi-byte chars (em-dashes, emoji) shift every later offset.
|
|
118
|
+
return src[_attr(n, "start_byte") : _attr(n, "end_byte")].decode("utf-8", "ignore")
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _root(tree):
|
|
122
|
+
return _attr(tree, "root_node")
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _do_parse(parser, text: str):
|
|
126
|
+
try:
|
|
127
|
+
return parser.parse(text) # language-pack binding wants str
|
|
128
|
+
except TypeError:
|
|
129
|
+
return parser.parse(text.encode("utf-8")) # standard binding wants bytes
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _walk(node):
|
|
133
|
+
stack = [node]
|
|
134
|
+
while stack:
|
|
135
|
+
n = stack.pop()
|
|
136
|
+
yield n
|
|
137
|
+
stack.extend(_children(n))
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
# --- symbol / import extraction ----------------------------------------------
|
|
141
|
+
|
|
142
|
+
_DEF_TYPES = {
|
|
143
|
+
"python": {"function_definition": "function", "class_definition": "class"},
|
|
144
|
+
"javascript": {
|
|
145
|
+
"function_declaration": "function",
|
|
146
|
+
"generator_function_declaration": "function",
|
|
147
|
+
"class_declaration": "class",
|
|
148
|
+
"method_definition": "method",
|
|
149
|
+
},
|
|
150
|
+
}
|
|
151
|
+
_ARROW_VALUES = ("arrow_function", "function", "function_expression")
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _base_lang(lang: str) -> str:
|
|
155
|
+
"""Collapse a tree-sitter language name to the extractor family that handles
|
|
156
|
+
it. typescript/tsx/javascript all share the JS extractor; dart and python
|
|
157
|
+
each have their own."""
|
|
158
|
+
if lang == "python":
|
|
159
|
+
return "python"
|
|
160
|
+
if lang == "dart":
|
|
161
|
+
return "dart"
|
|
162
|
+
return "javascript"
|
|
163
|
+
# Identifier node kinds we count as a *use* of a name (a reference). Definition
|
|
164
|
+
# sites are excluded separately, so what remains is genuine usage: calls, JSX
|
|
165
|
+
# tags, type annotations, value reads, object shorthand.
|
|
166
|
+
_IDENT_KINDS = {
|
|
167
|
+
"identifier", "property_identifier", "type_identifier", "shorthand_property_identifier",
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _signature(src: str, node) -> str:
|
|
172
|
+
text = _text(src, node)
|
|
173
|
+
line = text.splitlines()[0] if text else ""
|
|
174
|
+
return line.strip()[:160]
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def extract(lang: str, src):
|
|
178
|
+
"""Return (symbols, imports, calls, refs).
|
|
179
|
+
symbols: (kind, name, line, signature). imports: language-specific descriptors.
|
|
180
|
+
calls: (enclosing_symbol|None, callee_name, line). refs: distinct names USED in
|
|
181
|
+
the file (references), with the file's own definition names excluded."""
|
|
182
|
+
if isinstance(src, bytes):
|
|
183
|
+
src_bytes, src_str = src, src.decode("utf-8", "ignore")
|
|
184
|
+
else:
|
|
185
|
+
src_str, src_bytes = src, src.encode("utf-8")
|
|
186
|
+
parser = _parser(lang)
|
|
187
|
+
if parser is None:
|
|
188
|
+
return [], [], [], []
|
|
189
|
+
root = _root(_do_parse(parser, src_str))
|
|
190
|
+
base = _base_lang(lang)
|
|
191
|
+
if base == "dart":
|
|
192
|
+
return _dart_extract(root, src_bytes)
|
|
193
|
+
def_types = _DEF_TYPES[base]
|
|
194
|
+
call_kinds = {"call"} if base == "python" else {"call_expression", "new_expression"}
|
|
195
|
+
|
|
196
|
+
symbols, calls = [], []
|
|
197
|
+
refs: set[str] = set()
|
|
198
|
+
def_spans: set[int] = set() # byte offsets of definition-name nodes (not uses)
|
|
199
|
+
stack = [(root, None)] # (node, enclosing definition name)
|
|
200
|
+
while stack:
|
|
201
|
+
n, enc = stack.pop()
|
|
202
|
+
k = _kind(n)
|
|
203
|
+
new_enc = enc
|
|
204
|
+
kind = def_types.get(k)
|
|
205
|
+
if kind:
|
|
206
|
+
name_node = _field(n, "name")
|
|
207
|
+
if name_node is not None:
|
|
208
|
+
nm = _text(src_bytes, name_node)
|
|
209
|
+
symbols.append((kind, nm, _line(n), _signature(src_bytes, n)))
|
|
210
|
+
def_spans.add(_attr(name_node, "start_byte"))
|
|
211
|
+
new_enc = nm
|
|
212
|
+
# const foo = () => {...} / const Bar = function(){}
|
|
213
|
+
elif k == "variable_declarator":
|
|
214
|
+
value = _field(n, "value")
|
|
215
|
+
name_node = _field(n, "name")
|
|
216
|
+
if name_node is not None:
|
|
217
|
+
def_spans.add(_attr(name_node, "start_byte")) # a binding, not a use
|
|
218
|
+
if value is not None and _kind(value) in _ARROW_VALUES:
|
|
219
|
+
nm = _text(src_bytes, name_node)
|
|
220
|
+
symbols.append(("function", nm, _line(n), _signature(src_bytes, n)))
|
|
221
|
+
new_enc = nm
|
|
222
|
+
elif k in call_kinds:
|
|
223
|
+
cn = _callee_name(src_bytes, n)
|
|
224
|
+
if cn:
|
|
225
|
+
calls.append((enc, cn, _line(n)))
|
|
226
|
+
if k in _IDENT_KINDS and _attr(n, "start_byte") not in def_spans:
|
|
227
|
+
refs.add(_text(src_bytes, n))
|
|
228
|
+
for c in _children(n):
|
|
229
|
+
stack.append((c, new_enc))
|
|
230
|
+
|
|
231
|
+
imports = _py_imports(root, src_bytes) if base == "python" else _js_imports(root, src_bytes)
|
|
232
|
+
return symbols, imports, calls, sorted(refs)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _callee_name(src, call):
|
|
236
|
+
"""Best-effort callee name of a call / new expression (the rightmost name)."""
|
|
237
|
+
fn = _field(call, "function") or _field(call, "constructor")
|
|
238
|
+
if fn is None:
|
|
239
|
+
return None
|
|
240
|
+
k = _kind(fn)
|
|
241
|
+
if k in ("identifier", "property_identifier"):
|
|
242
|
+
return _text(src, fn)
|
|
243
|
+
if k == "attribute": # python a.b.c -> field 'attribute'
|
|
244
|
+
a = _field(fn, "attribute")
|
|
245
|
+
return _text(src, a) if a is not None else None
|
|
246
|
+
if k == "member_expression": # js a.b.c -> field 'property'
|
|
247
|
+
p = _field(fn, "property")
|
|
248
|
+
return _text(src, p) if p is not None else None
|
|
249
|
+
return None
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _aliased_name(src, node):
|
|
253
|
+
name = _field(node, "name") or next(
|
|
254
|
+
(g for g in _children(node) if _kind(g) == "dotted_name"), None
|
|
255
|
+
)
|
|
256
|
+
return _text(src, name) if name is not None else None
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def _py_imports(root, src):
|
|
260
|
+
"""Return list of (level, module): level 0 == absolute, level N == N leading dots.
|
|
261
|
+
|
|
262
|
+
For `from <module> import <names>` we also emit each imported name as a
|
|
263
|
+
candidate submodule (joined to the module). This is what resolves
|
|
264
|
+
`from . import config` -> config.py, since tree-sitter puts the dots in
|
|
265
|
+
module_name and the name `config` in a separate child.
|
|
266
|
+
"""
|
|
267
|
+
out = []
|
|
268
|
+
for n in _walk(root):
|
|
269
|
+
k = _kind(n)
|
|
270
|
+
if k == "import_statement":
|
|
271
|
+
for c in _children(n):
|
|
272
|
+
ck = _kind(c)
|
|
273
|
+
if ck == "dotted_name":
|
|
274
|
+
out.append((0, _text(src, c)))
|
|
275
|
+
elif ck == "aliased_import":
|
|
276
|
+
nm = _aliased_name(src, c)
|
|
277
|
+
if nm:
|
|
278
|
+
out.append((0, nm))
|
|
279
|
+
elif k == "import_from_statement":
|
|
280
|
+
mod = _field(n, "module_name")
|
|
281
|
+
if mod is None:
|
|
282
|
+
continue
|
|
283
|
+
mtxt = _text(src, mod)
|
|
284
|
+
level = len(mtxt) - len(mtxt.lstrip("."))
|
|
285
|
+
module = mtxt[level:]
|
|
286
|
+
out.append((level, module))
|
|
287
|
+
mod_span = (_attr(mod, "start_byte"), _attr(mod, "end_byte"))
|
|
288
|
+
for c in _children(n):
|
|
289
|
+
if (_attr(c, "start_byte"), _attr(c, "end_byte")) == mod_span:
|
|
290
|
+
continue
|
|
291
|
+
ck = _kind(c)
|
|
292
|
+
nm = None
|
|
293
|
+
if ck == "dotted_name":
|
|
294
|
+
nm = _text(src, c)
|
|
295
|
+
elif ck == "aliased_import":
|
|
296
|
+
nm = _aliased_name(src, c)
|
|
297
|
+
if nm:
|
|
298
|
+
out.append((level, f"{module}.{nm}" if module else nm))
|
|
299
|
+
return out
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def _is_type_only(stmt) -> bool:
|
|
303
|
+
"""True when a TS import/export contributes no runtime value, so its edge is
|
|
304
|
+
elided after compilation: `import type {...}` (a `type` keyword right after
|
|
305
|
+
`import`), or a named clause where every specifier is itself `type`-qualified
|
|
306
|
+
(`import { type A, type B }`). A default/namespace binding, a side-effect
|
|
307
|
+
import, or any untyped specifier makes it a real runtime edge."""
|
|
308
|
+
children = _children(stmt)
|
|
309
|
+
if any(_kind(c) == "type" for c in children): # `import type { ... } from ...`
|
|
310
|
+
return True
|
|
311
|
+
specifiers = []
|
|
312
|
+
for c in children:
|
|
313
|
+
if _kind(c) != "import_clause":
|
|
314
|
+
continue
|
|
315
|
+
for cc in _children(c):
|
|
316
|
+
ck = _kind(cc)
|
|
317
|
+
if ck == "named_imports":
|
|
318
|
+
specifiers.extend(s for s in _children(cc) if _kind(s) == "import_specifier")
|
|
319
|
+
elif ck in ("identifier", "namespace_import"):
|
|
320
|
+
return False # default / `* as ns` binding is a runtime value
|
|
321
|
+
if not specifiers:
|
|
322
|
+
return False # side-effect import, or a re-export we can't prove type-only
|
|
323
|
+
return all(any(_kind(g) == "type" for g in _children(s)) for s in specifiers)
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def _js_imports(root, src):
|
|
327
|
+
"""Return list of (source, is_type) — source strings like './foo' or 'react',
|
|
328
|
+
is_type=True for type-only TS imports (no runtime edge). Captures static
|
|
329
|
+
import/export, CommonJS `require(...)`, and dynamic `import(...)` (e.g. the
|
|
330
|
+
`dynamic(() => import('./X'))` lazy-load pattern), all as runtime edges."""
|
|
331
|
+
out = []
|
|
332
|
+
for n in _walk(root):
|
|
333
|
+
k = _kind(n)
|
|
334
|
+
if k in ("import_statement", "export_statement"):
|
|
335
|
+
source = _field(n, "source")
|
|
336
|
+
if source is not None:
|
|
337
|
+
out.append((_text(src, source).strip("\"'`"), _is_type_only(n)))
|
|
338
|
+
elif k == "call_expression":
|
|
339
|
+
fn = _field(n, "function")
|
|
340
|
+
if fn is None:
|
|
341
|
+
continue
|
|
342
|
+
# require('x') or dynamic import('x') (function node is the `import` kw)
|
|
343
|
+
if _text(src, fn) == "require" or _kind(fn) == "import":
|
|
344
|
+
args = _field(n, "arguments")
|
|
345
|
+
if args is not None:
|
|
346
|
+
for c in _children(args):
|
|
347
|
+
if _kind(c) == "string":
|
|
348
|
+
out.append((_text(src, c).strip("\"'`"), False))
|
|
349
|
+
return out
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
# --- Dart / Flutter extraction -----------------------------------------------
|
|
353
|
+
# tree-sitter-dart shape (verified against the language pack grammar):
|
|
354
|
+
# top-level function -> `function_signature` (name field); body is a sibling
|
|
355
|
+
# method in a class -> `method_signature` wrapping a `function_signature`
|
|
356
|
+
# class / enum -> `class_definition` / `enum_declaration` (name field)
|
|
357
|
+
# mixin -> `mixin_declaration` (no name field; plain identifier)
|
|
358
|
+
# extension -> `extension_declaration` (may be anonymous -> no symbol)
|
|
359
|
+
# call `Foo(...)` -> an `identifier`/`type_identifier` immediately followed
|
|
360
|
+
# by a `selector` whose first child is `argument_part`
|
|
361
|
+
_DART_CONTAINER_DEFS = {
|
|
362
|
+
"class_definition": "class",
|
|
363
|
+
"mixin_declaration": "class",
|
|
364
|
+
"enum_declaration": "enum",
|
|
365
|
+
"extension_declaration": "extension",
|
|
366
|
+
}
|
|
367
|
+
_DART_IDENT_KINDS = {"identifier", "type_identifier"}
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def _dart_name_node(node):
|
|
371
|
+
"""Name node of a Dart container declaration: the `name` field when present
|
|
372
|
+
(class/enum/named extension), else the first plain `identifier` child (mixin).
|
|
373
|
+
Anonymous extensions have neither, so this returns None and no symbol is emitted
|
|
374
|
+
— never the `type_identifier` of the extended type."""
|
|
375
|
+
nm = _field(node, "name")
|
|
376
|
+
if nm is None:
|
|
377
|
+
nm = next((c for c in _children(node) if _kind(c) == "identifier"), None)
|
|
378
|
+
return nm
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def _dart_imports(root, src):
|
|
382
|
+
"""Return the raw URI string of every import/export/part directive, e.g.
|
|
383
|
+
'package:app/x.dart', '../models/user.dart', 'user.g.dart'. Exports re-expose a
|
|
384
|
+
file's API, and `part` pulls a file into the library, so both are real edges.
|
|
385
|
+
`part of` is the reverse pointer (part -> library); we skip it to avoid a cycle
|
|
386
|
+
against the `part` edge the library already declares."""
|
|
387
|
+
out = []
|
|
388
|
+
for n in _walk(root):
|
|
389
|
+
if _kind(n) in ("library_import", "library_export", "part_directive"):
|
|
390
|
+
uri = next((c for c in _walk(n) if _kind(c) == "uri"), None)
|
|
391
|
+
if uri is not None:
|
|
392
|
+
out.append(_text(src, uri).strip().strip("\"'"))
|
|
393
|
+
return out
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
# Signatures that sit inside a class-body member wrapper. A member with a body
|
|
397
|
+
# (or abstract) is a `method_signature`; a field or body-less constructor is a
|
|
398
|
+
# `declaration`. Either can wrap one of these inner signatures.
|
|
399
|
+
_DART_SIG_KINDS = {
|
|
400
|
+
"function_signature": "method",
|
|
401
|
+
"constructor_signature": "constructor",
|
|
402
|
+
"constant_constructor_signature": "constructor",
|
|
403
|
+
"factory_constructor_signature": "constructor",
|
|
404
|
+
"getter_signature": "method",
|
|
405
|
+
"setter_signature": "method",
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
def _dart_member_symbol(node, src):
|
|
410
|
+
"""Resolve a class-body member (`method_signature` or `declaration`) to a
|
|
411
|
+
(kind, name, name_nodes) triple. The name is the *simple* call-site name — the
|
|
412
|
+
last plain identifier of the signature — so `Product.named` -> 'named', the
|
|
413
|
+
default ctor -> the class name, a getter -> its property. Matching by simple
|
|
414
|
+
name is what keeps cerebro_callers and dead_symbols working, since the refs and
|
|
415
|
+
calls tables store unqualified names. Returns (None, None, ()) for fields and
|
|
416
|
+
other declarations that define no callable symbol."""
|
|
417
|
+
sig = next((c for c in _children(node) if _kind(c) in _DART_SIG_KINDS), None)
|
|
418
|
+
if sig is None:
|
|
419
|
+
return None, None, ()
|
|
420
|
+
if _kind(sig) == "function_signature":
|
|
421
|
+
nm = _field(sig, "name")
|
|
422
|
+
return ("method", _text(src, nm), (nm,)) if nm is not None else (None, None, ())
|
|
423
|
+
# ctor / getter / setter: name is the signature's last direct identifier
|
|
424
|
+
# (`Product.named` -> [Product, named] -> 'named'; getter -> [total]).
|
|
425
|
+
idents = [c for c in _children(sig) if _kind(c) == "identifier"]
|
|
426
|
+
if not idents:
|
|
427
|
+
return None, None, ()
|
|
428
|
+
return _DART_SIG_KINDS[_kind(sig)], _text(src, idents[-1]), tuple(idents)
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def _dart_extract(root, src):
|
|
432
|
+
"""Dart's grammar differs enough from python/js (methods wrap a nested
|
|
433
|
+
function_signature; calls are identifier+selector, not call_expression) to
|
|
434
|
+
warrant its own walk. Returns the same (symbols, imports, calls, refs)."""
|
|
435
|
+
symbols, calls = [], []
|
|
436
|
+
refs: set[str] = set()
|
|
437
|
+
def_spans: set[int] = set() # byte offsets of definition-name nodes (not uses)
|
|
438
|
+
stack = [(root, None, None)] # (node, parent_kind, enclosing definition name)
|
|
439
|
+
while stack:
|
|
440
|
+
n, parent_kind, enc = stack.pop()
|
|
441
|
+
k = _kind(n)
|
|
442
|
+
new_enc = enc
|
|
443
|
+
# Members (methods, constructors, getters, setters) live in a
|
|
444
|
+
# method_signature (has a body / abstract) or a declaration (field or
|
|
445
|
+
# body-less ctor); _dart_member_symbol sorts out which and skips fields.
|
|
446
|
+
if k in ("method_signature", "declaration"):
|
|
447
|
+
mkind, mname, mnodes = _dart_member_symbol(n, src)
|
|
448
|
+
if mkind:
|
|
449
|
+
symbols.append((mkind, mname, _line(n), _signature(src, n)))
|
|
450
|
+
for nn in mnodes:
|
|
451
|
+
def_spans.add(_attr(nn, "start_byte"))
|
|
452
|
+
new_enc = mname
|
|
453
|
+
elif k == "function_signature" and parent_kind not in ("method_signature", "declaration"):
|
|
454
|
+
name_node = _field(n, "name") # top-level fn; member sigs handled above
|
|
455
|
+
if name_node is not None:
|
|
456
|
+
nm = _text(src, name_node)
|
|
457
|
+
symbols.append(("function", nm, _line(n), _signature(src, n)))
|
|
458
|
+
def_spans.add(_attr(name_node, "start_byte"))
|
|
459
|
+
new_enc = nm
|
|
460
|
+
elif k in _DART_CONTAINER_DEFS:
|
|
461
|
+
name_node = _dart_name_node(n)
|
|
462
|
+
if name_node is not None:
|
|
463
|
+
nm = _text(src, name_node)
|
|
464
|
+
symbols.append((_DART_CONTAINER_DEFS[k], nm, _line(n), _signature(src, n)))
|
|
465
|
+
def_spans.add(_attr(name_node, "start_byte"))
|
|
466
|
+
new_enc = nm
|
|
467
|
+
elif k == "type_alias": # typedef Json = ...; -> name is the first type_identifier
|
|
468
|
+
name_node = next((c for c in _children(n) if _kind(c) == "type_identifier"), None)
|
|
469
|
+
if name_node is not None:
|
|
470
|
+
symbols.append(("typedef", _text(src, name_node), _line(n), _signature(src, n)))
|
|
471
|
+
def_spans.add(_attr(name_node, "start_byte"))
|
|
472
|
+
elif k in ("static_final_declaration_list", "initialized_identifier_list") and parent_kind == "program":
|
|
473
|
+
# Top-level const/final/var (Riverpod providers, theme constants, etc.).
|
|
474
|
+
# The same node kinds nest under `declaration` for class fields, which
|
|
475
|
+
# the parent_kind=='program' guard excludes.
|
|
476
|
+
decls = [c for c in _children(n)
|
|
477
|
+
if _kind(c) in ("static_final_declaration", "initialized_identifier")]
|
|
478
|
+
names = [next((g for g in _children(d) if _kind(g) == "identifier"), None) for d in decls]
|
|
479
|
+
for d, nm in zip(decls, names):
|
|
480
|
+
if nm is not None:
|
|
481
|
+
symbols.append(("variable", _text(src, nm), _line(d), _signature(src, d)))
|
|
482
|
+
def_spans.add(_attr(nm, "start_byte"))
|
|
483
|
+
# Attribute the initializer's calls to the variable when it's the only
|
|
484
|
+
# one declared (`final x = Provider(...)` -> x calls Provider).
|
|
485
|
+
if len(names) == 1 and names[0] is not None:
|
|
486
|
+
new_enc = _text(src, names[0])
|
|
487
|
+
elif k == "enum_constant":
|
|
488
|
+
name_node = next((c for c in _children(n) if _kind(c) == "identifier"), None)
|
|
489
|
+
if name_node is not None:
|
|
490
|
+
symbols.append(("enum_member", _text(src, name_node), _line(n), _signature(src, n)))
|
|
491
|
+
def_spans.add(_attr(name_node, "start_byte"))
|
|
492
|
+
# --- call sites, all read from a node's direct children -------------
|
|
493
|
+
# bare call Foo(...) -> identifier + selector(argument_part)
|
|
494
|
+
# method call obj.foo(...) -> selector(.foo) + selector(argument_part)
|
|
495
|
+
# cascade obj..foo(...) -> cascade_section{ cascade_selector + argument_part }
|
|
496
|
+
# A `selector` only carries args when its first child is `argument_part`
|
|
497
|
+
# (a `.name` field access is an unconditional_assignable_selector instead),
|
|
498
|
+
# which is what tells `obj.foo(...)` (call) apart from `obj.foo` (read).
|
|
499
|
+
kids = _children(n)
|
|
500
|
+
if k == "cascade_section" and any(_kind(c) == "argument_part" for c in kids):
|
|
501
|
+
csel = next((c for c in kids if _kind(c) == "cascade_selector"), None)
|
|
502
|
+
nm = next((g for g in _children(csel) if _kind(g) == "identifier"), None) if csel else None
|
|
503
|
+
if nm is not None:
|
|
504
|
+
calls.append((enc, _text(src, nm), _line(nm)))
|
|
505
|
+
for i, c in enumerate(kids):
|
|
506
|
+
nxt = kids[i + 1] if i + 1 < len(kids) else None
|
|
507
|
+
sc = _children(nxt) if nxt is not None and _kind(nxt) == "selector" else None
|
|
508
|
+
if not (sc and _kind(sc[0]) == "argument_part"):
|
|
509
|
+
continue
|
|
510
|
+
name_node = None
|
|
511
|
+
if _kind(c) in _DART_IDENT_KINDS: # Foo(...), setState(...)
|
|
512
|
+
name_node = c
|
|
513
|
+
elif _kind(c) == "selector": # obj.foo(...): name is in the
|
|
514
|
+
uas = _children(c)[0] if _children(c) else None # preceding .foo selector
|
|
515
|
+
if uas is not None and _kind(uas) == "unconditional_assignable_selector":
|
|
516
|
+
name_node = next((g for g in _children(uas) if _kind(g) == "identifier"), None)
|
|
517
|
+
if name_node is not None:
|
|
518
|
+
calls.append((enc, _text(src, name_node), _line(name_node)))
|
|
519
|
+
if k in _DART_IDENT_KINDS and _attr(n, "start_byte") not in def_spans:
|
|
520
|
+
refs.add(_text(src, n))
|
|
521
|
+
# A function/method body is a *sibling* of its signature in Dart, so its
|
|
522
|
+
# calls would otherwise be attributed to the enclosing class (or to None
|
|
523
|
+
# at top level). Pair each signature with the following body so calls
|
|
524
|
+
# inside are attributed to that function/method.
|
|
525
|
+
child_enc = {}
|
|
526
|
+
pending = None
|
|
527
|
+
for i, c in enumerate(kids):
|
|
528
|
+
ck = _kind(c)
|
|
529
|
+
if ck == "function_signature":
|
|
530
|
+
nm = _field(c, "name")
|
|
531
|
+
pending = _text(src, nm) if nm is not None else None
|
|
532
|
+
elif ck in ("method_signature", "declaration"):
|
|
533
|
+
pending = _dart_member_symbol(c, src)[1]
|
|
534
|
+
elif ck == "function_body":
|
|
535
|
+
if pending is not None:
|
|
536
|
+
child_enc[i] = pending
|
|
537
|
+
pending = None
|
|
538
|
+
for i, c in enumerate(kids):
|
|
539
|
+
stack.append((c, k, child_enc.get(i, new_enc)))
|
|
540
|
+
return symbols, _dart_imports(root, src), calls, sorted(refs)
|
|
541
|
+
|
|
542
|
+
|
|
543
|
+
# --- import resolution (raw import -> repo-relative path) --------------------
|
|
544
|
+
|
|
545
|
+
def _resolve_python(level, module, importer_rel, known: set[str]):
|
|
546
|
+
parts_mod = [p for p in module.split(".") if p]
|
|
547
|
+
if level > 0:
|
|
548
|
+
importer_dir = posixpath.dirname(importer_rel)
|
|
549
|
+
dir_parts = importer_dir.split("/") if importer_dir else []
|
|
550
|
+
# level 1 = importer's own package; each extra dot climbs one more.
|
|
551
|
+
keep = len(dir_parts) - (level - 1)
|
|
552
|
+
if keep < 0:
|
|
553
|
+
return None
|
|
554
|
+
base = dir_parts[:keep] + parts_mod
|
|
555
|
+
return _first_existing(["/".join(base)], known)
|
|
556
|
+
candidate = "/".join(parts_mod)
|
|
557
|
+
return _first_existing([candidate], known, suffix_ok=True)
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
def _first_existing(stems, known: set[str], suffix_ok: bool = False):
|
|
561
|
+
for stem in stems:
|
|
562
|
+
cands = [stem + ".py", stem + "/__init__.py"]
|
|
563
|
+
for c in cands:
|
|
564
|
+
if c in known:
|
|
565
|
+
return c
|
|
566
|
+
if suffix_ok:
|
|
567
|
+
for c in cands:
|
|
568
|
+
tail = "/" + c
|
|
569
|
+
hit = next((k for k in known if k.endswith(tail)), None)
|
|
570
|
+
if hit:
|
|
571
|
+
return hit
|
|
572
|
+
return None
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
_JS_EXTS = [".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs"]
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
def _resolve_fs(target: str, known: set[str]):
|
|
579
|
+
"""Resolve a repo-relative module path (no extension) to a real file, trying
|
|
580
|
+
JS/TS extensions and an index file in a directory."""
|
|
581
|
+
cands = [target] + [target + e for e in _JS_EXTS]
|
|
582
|
+
cands += [target + "/index" + e for e in _JS_EXTS]
|
|
583
|
+
for c in cands:
|
|
584
|
+
if c in known:
|
|
585
|
+
return c
|
|
586
|
+
return None
|
|
587
|
+
|
|
588
|
+
|
|
589
|
+
def _resolve_js(source, importer_rel, known: set[str]):
|
|
590
|
+
if not source.startswith("."):
|
|
591
|
+
return None # relative imports only; aliases handled separately
|
|
592
|
+
importer_dir = posixpath.dirname(importer_rel)
|
|
593
|
+
target = posixpath.normpath(posixpath.join(importer_dir, source))
|
|
594
|
+
return _resolve_fs(target, known)
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
def _resolve_dart(uri: str, importer_rel: str, known: set[str], pkg_roots: dict):
|
|
598
|
+
"""Resolve a Dart import URI to a repo-relative file.
|
|
599
|
+
'dart:...' -> SDK, no edge.
|
|
600
|
+
'package:p/sub' -> p's pubspec dir + '/lib/' + sub, when p is a local package.
|
|
601
|
+
anything else -> a path relative to the importer. Dart treats a bare
|
|
602
|
+
'src/x.dart' as relative (unlike a JS bare specifier,
|
|
603
|
+
which means node_modules)."""
|
|
604
|
+
if uri.startswith("dart:"):
|
|
605
|
+
return None
|
|
606
|
+
if uri.startswith("package:"):
|
|
607
|
+
pkg, _, sub = uri[len("package:"):].partition("/")
|
|
608
|
+
base = pkg_roots.get(pkg)
|
|
609
|
+
if base is None or not sub:
|
|
610
|
+
return None
|
|
611
|
+
target = posixpath.normpath(posixpath.join(base, "lib", sub))
|
|
612
|
+
return target if target in known else None
|
|
613
|
+
importer_dir = posixpath.dirname(importer_rel)
|
|
614
|
+
target = posixpath.normpath(posixpath.join(importer_dir, uri))
|
|
615
|
+
return target if target in known else None
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
def resolve_imports(lang, imports, importer_rel, known: set[str], alias_configs=None, dart_pkgs=None) -> dict:
|
|
619
|
+
"""Resolve raw imports to a {dst_path: kind} map. kind is 'type' only when
|
|
620
|
+
EVERY import resolving to that target is type-only — a single runtime import
|
|
621
|
+
makes the edge 'import', since the target is then loaded at runtime."""
|
|
622
|
+
base = _base_lang(lang)
|
|
623
|
+
if base == "dart":
|
|
624
|
+
edges = {}
|
|
625
|
+
for uri in imports:
|
|
626
|
+
hit = _resolve_dart(uri, importer_rel, known, dart_pkgs or {})
|
|
627
|
+
if hit and hit != importer_rel:
|
|
628
|
+
edges[hit] = "import"
|
|
629
|
+
return edges
|
|
630
|
+
runtime, type_only = set(), set()
|
|
631
|
+
for imp in imports:
|
|
632
|
+
is_type = False
|
|
633
|
+
if base == "python":
|
|
634
|
+
level, module = imp
|
|
635
|
+
hit = _resolve_python(level, module, importer_rel, known)
|
|
636
|
+
else:
|
|
637
|
+
source, is_type = imp
|
|
638
|
+
if source.startswith("."):
|
|
639
|
+
hit = _resolve_js(source, importer_rel, known)
|
|
640
|
+
else:
|
|
641
|
+
# bare import: try tsconfig/jsconfig path aliases (@/..., ~/..., etc.)
|
|
642
|
+
hit = None
|
|
643
|
+
for cand in tsconfig.expand(source, importer_rel, alias_configs or []):
|
|
644
|
+
hit = _resolve_fs(cand, known)
|
|
645
|
+
if hit:
|
|
646
|
+
break
|
|
647
|
+
if hit and hit != importer_rel:
|
|
648
|
+
(type_only if is_type else runtime).add(hit)
|
|
649
|
+
edges = {d: "import" for d in runtime}
|
|
650
|
+
for d in type_only:
|
|
651
|
+
edges.setdefault(d, "type") # demoted to runtime above if also imported as a value
|
|
652
|
+
return edges
|
|
653
|
+
|
|
654
|
+
|
|
655
|
+
# --- framework entrypoints (loaded by tooling, not by import) ----------------
|
|
656
|
+
|
|
657
|
+
# A source-file token inside a package.json script command, e.g. the
|
|
658
|
+
# `src/database/seeder.ts` in `ts-node ... src/database/seeder.ts`.
|
|
659
|
+
_SCRIPT_FILE_RE = re.compile(r"[\w./@-]+\.(?:ts|tsx|js|jsx|mjs|cjs)\b")
|
|
660
|
+
|
|
661
|
+
|
|
662
|
+
def script_entrypoints(config, known: set[str]) -> set[str]:
|
|
663
|
+
"""Repo-relative source files invoked by a package.json `scripts` command.
|
|
664
|
+
|
|
665
|
+
These are run by tooling (`npm run seed` -> `ts-node src/database/seeder.ts`),
|
|
666
|
+
not imported by other code, so the dependency graph never sees an edge into
|
|
667
|
+
them — without this they masquerade as dead code in orphans()."""
|
|
668
|
+
out: set[str] = set()
|
|
669
|
+
for rel in known:
|
|
670
|
+
if posixpath.basename(rel) != "package.json":
|
|
671
|
+
continue
|
|
672
|
+
try:
|
|
673
|
+
data = json.loads((config.root / rel).read_text(encoding="utf-8", errors="ignore"))
|
|
674
|
+
except Exception:
|
|
675
|
+
continue
|
|
676
|
+
scripts = data.get("scripts")
|
|
677
|
+
if not isinstance(scripts, dict):
|
|
678
|
+
continue
|
|
679
|
+
pkg_dir = posixpath.dirname(rel)
|
|
680
|
+
for cmd in scripts.values():
|
|
681
|
+
if not isinstance(cmd, str):
|
|
682
|
+
continue
|
|
683
|
+
for token in _SCRIPT_FILE_RE.findall(cmd):
|
|
684
|
+
target = posixpath.normpath(posixpath.join(pkg_dir, token))
|
|
685
|
+
if target in known:
|
|
686
|
+
out.add(target)
|
|
687
|
+
return out
|
|
688
|
+
|
|
689
|
+
|
|
690
|
+
# A pubspec.yaml top-level `name:` value (the Dart package name). Matched without
|
|
691
|
+
# a YAML parser since we only need this one scalar field.
|
|
692
|
+
_DART_PKG_NAME_RE = re.compile(r"(?m)^name:[ \t]*['\"]?([A-Za-z_][A-Za-z0-9_]*)")
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
def dart_package_roots(config, known: set[str]) -> dict:
|
|
696
|
+
"""Map each Dart package name to the repo-relative dir holding its pubspec.yaml,
|
|
697
|
+
so `package:<name>/x.dart` imports resolve to `<dir>/lib/x.dart`. A Flutter
|
|
698
|
+
monorepo/polyrepo can declare several packages, hence a map, not one name."""
|
|
699
|
+
roots: dict[str, str] = {}
|
|
700
|
+
for rel in known:
|
|
701
|
+
if posixpath.basename(rel) != "pubspec.yaml":
|
|
702
|
+
continue
|
|
703
|
+
try:
|
|
704
|
+
text = (config.root / rel).read_text(encoding="utf-8", errors="ignore")
|
|
705
|
+
except OSError:
|
|
706
|
+
continue
|
|
707
|
+
m = _DART_PKG_NAME_RE.search(text)
|
|
708
|
+
if m:
|
|
709
|
+
roots[m.group(1)] = posixpath.dirname(rel)
|
|
710
|
+
return roots
|
|
711
|
+
|
|
712
|
+
|
|
713
|
+
# --- reindex (apply changes to the DB) --------------------------------------
|
|
714
|
+
|
|
715
|
+
def _index_one(config, conn, rel, file_hash_val, known, alias_configs, stamp, src=None, dart_pkgs=None):
|
|
716
|
+
"""Index a single file: store its row, symbols, and dependency edges."""
|
|
717
|
+
abs_path = config.root / rel
|
|
718
|
+
if src is None:
|
|
719
|
+
try:
|
|
720
|
+
src = abs_path.read_bytes()
|
|
721
|
+
except OSError:
|
|
722
|
+
return
|
|
723
|
+
lang = config.lang_for(rel)
|
|
724
|
+
stat = abs_path.stat()
|
|
725
|
+
db.upsert_file(conn, rel, lang, file_hash_val, stat.st_mtime, stat.st_size, stamp)
|
|
726
|
+
symbols, imports, calls, refs = ([], [], [], [])
|
|
727
|
+
if lang:
|
|
728
|
+
symbols, imports, calls, refs = extract(lang, src)
|
|
729
|
+
db.replace_symbols(conn, rel, symbols)
|
|
730
|
+
db.replace_edges(
|
|
731
|
+
conn,
|
|
732
|
+
rel,
|
|
733
|
+
resolve_imports(lang, imports, rel, known, alias_configs, dart_pkgs) if lang else [],
|
|
734
|
+
)
|
|
735
|
+
db.replace_calls(conn, rel, calls)
|
|
736
|
+
db.replace_refs(conn, rel, refs)
|
|
737
|
+
|
|
738
|
+
|
|
739
|
+
def reindex(config: cfg.Config, conn, paths: list[str] | None = None, force: bool = False) -> dict:
|
|
740
|
+
"""Bring the index up to date with disk. Only changed/new/deleted files are
|
|
741
|
+
touched (unless force=True, which re-extracts every file — useful after an
|
|
742
|
+
extractor upgrade; summaries/notes/embeddings are preserved)."""
|
|
743
|
+
disk = disk_state(config)
|
|
744
|
+
known = set(disk)
|
|
745
|
+
d = diff(conn, disk)
|
|
746
|
+
alias_configs = tsconfig.load_alias_configs(config)
|
|
747
|
+
dart_pkgs = dart_package_roots(config, known)
|
|
748
|
+
|
|
749
|
+
targets = sorted(known) if force else d["new"] + d["changed"]
|
|
750
|
+
if paths is not None:
|
|
751
|
+
wanted = set(paths)
|
|
752
|
+
targets = [p for p in targets if p in wanted]
|
|
753
|
+
|
|
754
|
+
stamp = now_iso()
|
|
755
|
+
for rel in targets:
|
|
756
|
+
_index_one(config, conn, rel, disk[rel], known, alias_configs, stamp, dart_pkgs=dart_pkgs)
|
|
757
|
+
|
|
758
|
+
for rel in d["deleted"]:
|
|
759
|
+
db.forget_file(conn, rel)
|
|
760
|
+
|
|
761
|
+
# Record package.json script entrypoints so orphans() doesn't flag them as
|
|
762
|
+
# dead. A full walk already happened above, so this is cheap.
|
|
763
|
+
conn.execute(
|
|
764
|
+
"INSERT INTO meta(key,value) VALUES('script_entrypoints',?) "
|
|
765
|
+
"ON CONFLICT(key) DO UPDATE SET value=excluded.value",
|
|
766
|
+
(json.dumps(sorted(script_entrypoints(config, known))),),
|
|
767
|
+
)
|
|
768
|
+
conn.execute(
|
|
769
|
+
"INSERT INTO meta(key,value) VALUES('last_reindex',?) "
|
|
770
|
+
"ON CONFLICT(key) DO UPDATE SET value=excluded.value",
|
|
771
|
+
(stamp,),
|
|
772
|
+
)
|
|
773
|
+
conn.commit()
|
|
774
|
+
return {
|
|
775
|
+
"indexed": len(targets),
|
|
776
|
+
"new": len(d["new"]),
|
|
777
|
+
"changed": len(d["changed"]),
|
|
778
|
+
"deleted": len(d["deleted"]),
|
|
779
|
+
"total_files": len(known),
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
|
|
783
|
+
def reindex_paths(config: cfg.Config, conn, rels: list[str]) -> dict:
|
|
784
|
+
"""Incrementally reindex specific files WITHOUT walking/hashing the whole tree.
|
|
785
|
+
Used by the post-edit hook so a single save stays cheap on large monorepos.
|
|
786
|
+
Edges resolve against the already-indexed file set."""
|
|
787
|
+
alias_configs = tsconfig.load_alias_configs(config)
|
|
788
|
+
known = set(db.stored_hashes(conn))
|
|
789
|
+
known.update(rels)
|
|
790
|
+
dart_pkgs = dart_package_roots(config, known)
|
|
791
|
+
stamp = now_iso()
|
|
792
|
+
touched = 0
|
|
793
|
+
for rel in rels:
|
|
794
|
+
abs_path = config.root / rel
|
|
795
|
+
if not abs_path.exists():
|
|
796
|
+
db.forget_file(conn, rel)
|
|
797
|
+
touched += 1
|
|
798
|
+
continue
|
|
799
|
+
if config.is_ignored(abs_path):
|
|
800
|
+
continue
|
|
801
|
+
try:
|
|
802
|
+
src = abs_path.read_bytes()
|
|
803
|
+
except OSError:
|
|
804
|
+
continue
|
|
805
|
+
h = hashlib.sha1(src).hexdigest()
|
|
806
|
+
prev = conn.execute("SELECT hash FROM files WHERE path=?", (rel,)).fetchone()
|
|
807
|
+
if prev and prev["hash"] == h:
|
|
808
|
+
continue
|
|
809
|
+
_index_one(config, conn, rel, h, known, alias_configs, stamp, src=src, dart_pkgs=dart_pkgs)
|
|
810
|
+
touched += 1
|
|
811
|
+
# Editing a package.json can change which files are script entrypoints; refresh
|
|
812
|
+
# the cached set so orphans() stays accurate without needing a full reindex.
|
|
813
|
+
if any(posixpath.basename(r) == "package.json" for r in rels):
|
|
814
|
+
conn.execute(
|
|
815
|
+
"INSERT INTO meta(key,value) VALUES('script_entrypoints',?) "
|
|
816
|
+
"ON CONFLICT(key) DO UPDATE SET value=excluded.value",
|
|
817
|
+
(json.dumps(sorted(script_entrypoints(config, known))),),
|
|
818
|
+
)
|
|
819
|
+
conn.commit()
|
|
820
|
+
return {"touched": touched, "files": len(rels)}
|
|
821
|
+
|
|
822
|
+
|
|
823
|
+
def _to_rel(config, arg: str) -> str | None:
|
|
824
|
+
p = Path(arg)
|
|
825
|
+
if p.is_absolute():
|
|
826
|
+
try:
|
|
827
|
+
return p.resolve().relative_to(config.root.resolve()).as_posix()
|
|
828
|
+
except ValueError:
|
|
829
|
+
return None
|
|
830
|
+
return posixpath.normpath(arg)
|
|
831
|
+
|
|
832
|
+
|
|
833
|
+
def main(): # `cerebro-index` entry point; with file args, does an incremental update
|
|
834
|
+
import json
|
|
835
|
+
import sys
|
|
836
|
+
|
|
837
|
+
config = cfg.Config.load()
|
|
838
|
+
conn = db.connect(config.db_path)
|
|
839
|
+
args = sys.argv[1:]
|
|
840
|
+
force = "--force" in args
|
|
841
|
+
args = [a for a in args if a != "--force"]
|
|
842
|
+
if args:
|
|
843
|
+
rels = [r for r in (_to_rel(config, a) for a in args) if r]
|
|
844
|
+
result = reindex_paths(config, conn, rels)
|
|
845
|
+
result["mode"] = "incremental"
|
|
846
|
+
else:
|
|
847
|
+
result = reindex(config, conn, force=force)
|
|
848
|
+
result["mode"] = "full-force" if force else "full"
|
|
849
|
+
result["root"] = str(config.root)
|
|
850
|
+
print(json.dumps(result, indent=2))
|
|
851
|
+
|
|
852
|
+
|
|
853
|
+
if __name__ == "__main__":
|
|
854
|
+
main()
|