neuroloom-codeweaver 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,75 @@
1
+
2
+ web/node_modules/
3
+
4
+ web/dist/
5
+
6
+ # Video (Remotion)
7
+ video/node_modules/
8
+ video/out/
9
+
10
+ *.tsbuildinfo
11
+
12
+ *.pyc
13
+
14
+ # Secrets
15
+ .env
16
+ .env.*
17
+ !.env.example
18
+
19
+ .mc/notes.md
20
+ .claude/agent-memory/
21
+
22
+ # Marketing site
23
+ marketing/node_modules/
24
+ marketing/.next/
25
+ marketing/.source/
26
+ marketing/out/
27
+
28
+ web/.claude/agent-memory/
29
+
30
+ mcp/.claude/agent-memory/
31
+
32
+ # Plugin (separate repo with its own git history)
33
+ neuroloom-claude-plugin/
34
+
35
+ neuroloom-sdlc-plugin/
36
+
37
+ api/.claude/agent-memory/
38
+
39
+ video/.claude/agent-memory/
40
+
41
+ .idea/
42
+
43
+ .neuroloom/
44
+
45
+ marketing/.claude/agent-memory/
46
+
47
+ evals/longmemeval/predictions.jsonl
48
+
49
+ evals/longmemeval/.claude/agent-memory/
50
+
51
+ evals/longmemeval/results.json
52
+
53
+ docs/.obsidian/
54
+
55
+ evals/codememeval/results.jsonl
56
+
57
+ evals/eval_common/.claude/agent-memory/
58
+
59
+ evals/codememeval/.claude/agent-memory/
60
+
61
+ evals/codememeval/metrics_summary.json
62
+
63
+ evals/longmemeval/results_run1.json
64
+
65
+ evals/longmemeval/predictions_run1.jsonl
66
+
67
+ marketing/next-env.d.ts
68
+
69
+ codeweaver/dist/
70
+ dist/
71
+
72
+ # Python caches
73
+ __pycache__/
74
+ .mypy_cache/
75
+ .ruff_cache/
@@ -0,0 +1,49 @@
1
+ Metadata-Version: 2.4
2
+ Name: neuroloom-codeweaver
3
+ Version: 0.1.0
4
+ Summary: Client-side Tree-sitter parser for Neuroloom code graph extraction
5
+ Project-URL: Homepage, https://neuroloom.dev
6
+ Project-URL: Documentation, https://neuroloom.dev/docs
7
+ Project-URL: Source, https://github.com/endless-galaxy-studios/neuroloom
8
+ License: MIT
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Classifier: Topic :: Software Development :: Libraries
16
+ Requires-Python: >=3.12
17
+ Requires-Dist: tree-sitter-python>=0.23.0
18
+ Requires-Dist: tree-sitter-typescript>=0.23.0
19
+ Requires-Dist: tree-sitter>=0.25.0
20
+ Provides-Extra: dev
21
+ Requires-Dist: mypy>=1.14.0; extra == 'dev'
22
+ Requires-Dist: ruff>=0.9.0; extra == 'dev'
23
+ Description-Content-Type: text/markdown
24
+
25
+ # neuroloom-codeweaver
26
+
27
+ Client-side Tree-sitter parser for Neuroloom code graph extraction.
28
+
29
+ ## Install
30
+
31
+ ```bash
32
+ pip install neuroloom-codeweaver
33
+ ```
34
+
35
+ ## Usage
36
+
37
+ ```python
38
+ from codeweaver import discover_files, parse_files
39
+
40
+ # Discover source files under a directory
41
+ files = discover_files("/path/to/project")
42
+
43
+ # Parse them into structured code graph nodes
44
+ nodes = parse_files(files)
45
+ ```
46
+
47
+ This package is intentionally standalone — it has no dependency on any Neuroloom server package.
48
+ Source code is parsed locally; only structural metadata (names, types, line ranges, relationships)
49
+ is produced.
@@ -0,0 +1,25 @@
1
+ # neuroloom-codeweaver
2
+
3
+ Client-side Tree-sitter parser for Neuroloom code graph extraction.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install neuroloom-codeweaver
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```python
14
+ from codeweaver import discover_files, parse_files
15
+
16
+ # Discover source files under a directory
17
+ files = discover_files("/path/to/project")
18
+
19
+ # Parse them into structured code graph nodes
20
+ nodes = parse_files(files)
21
+ ```
22
+
23
+ This package is intentionally standalone — it has no dependency on any Neuroloom server package.
24
+ Source code is parsed locally; only structural metadata (names, types, line ranges, relationships)
25
+ is produced.
@@ -0,0 +1,5 @@
1
+ """codeweaver — client-side Tree-sitter parser for Neuroloom code graph extraction."""
2
+
3
+ from codeweaver.parser import discover_files, parse_files
4
+
5
+ __all__ = ["discover_files", "parse_files"]
@@ -0,0 +1,874 @@
1
+ """
2
+ Client-side Tree-sitter parser for the Neuroloom code graph.
3
+
4
+ Extracts structural metadata (symbols, call edges, import edges) from
5
+ TypeScript and Python source files. No source code is included in the
6
+ output — only names, types, line ranges, and structural relationships.
7
+
8
+ The parser runs client-side (in the MCP server process) so that:
9
+ 1. No source code crosses the network boundary
10
+ 2. Parsing uses the agent's local file system access
11
+ 3. The API receives only structural metadata
12
+
13
+ # Usage:
14
+ # from codeweaver import discover_files, parse_files
15
+ #
16
+ # files = discover_files("/path/to/project")
17
+ # nodes = parse_files(files)
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import logging
23
+ from dataclasses import dataclass, field
24
+ from pathlib import Path
25
+ from typing import Any, cast
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+ # Directories to skip — build artifacts and vendored code
30
+ _SKIP_DIRS = frozenset({
31
+ "node_modules", ".next", ".nuxt", "dist", "build", "out",
32
+ ".turbo", ".cache", "__pycache__", ".git", "coverage",
33
+ ".nyc_output", ".venv", "venv", "env", ".env",
34
+ ".mypy_cache", ".ruff_cache", ".pytest_cache",
35
+ })
36
+
37
+ # File extensions to language mapping
38
+ _EXTENSION_MAP: dict[str, str] = {
39
+ ".ts": "typescript",
40
+ ".tsx": "typescript",
41
+ ".py": "python",
42
+ }
43
+
44
+ _BATCH_SIZE = 500
45
+
46
+
47
+ # ---------------------------------------------------------------------------
48
+ # Data containers
49
+ # ---------------------------------------------------------------------------
50
+
51
+
52
+ @dataclass
53
+ class Symbol:
54
+ """Extracted code symbol."""
55
+ qualified_name: str
56
+ name: str
57
+ file_path: str
58
+ symbol_type: str # "function", "class", "module"
59
+ line_start: int | None = None
60
+ line_end: int | None = None
61
+ language: str | None = None
62
+ metadata: dict[str, Any] = field(default_factory=dict)
63
+
64
+
65
+ @dataclass
66
+ class Edge:
67
+ """A structural edge between two symbols."""
68
+ source_qualified_name: str
69
+ target_qualified_name: str
70
+ edge_type: str # "calls", "imports", "inherits"
71
+ metadata: dict[str, Any] = field(default_factory=dict)
72
+
73
+
74
+ # ---------------------------------------------------------------------------
75
+ # Tree-sitter lazy import
76
+ # ---------------------------------------------------------------------------
77
+
78
+ def _get_ts_parser(language: str, is_tsx: bool = False) -> Any:
79
+ """Lazily import tree-sitter and return a configured Parser.
80
+
81
+ Raises ImportError with a clear message if tree-sitter is not installed.
82
+ """
83
+ try:
84
+ from tree_sitter import Language, Parser
85
+ except ImportError:
86
+ raise ImportError(
87
+ "tree-sitter is required for code_sync. Install with: "
88
+ "pip install neuroloom-codeweaver"
89
+ )
90
+
91
+ if language == "typescript":
92
+ try:
93
+ import tree_sitter_typescript as ts_typescript
94
+ except ImportError:
95
+ raise ImportError(
96
+ "tree-sitter-typescript is required for TypeScript parsing. "
97
+ "Install with: pip install neuroloom-codeweaver"
98
+ )
99
+ if is_tsx:
100
+ lang = Language(ts_typescript.language_tsx())
101
+ else:
102
+ lang = Language(ts_typescript.language_typescript())
103
+ return Parser(lang)
104
+
105
+ elif language == "python":
106
+ try:
107
+ import tree_sitter_python as ts_python
108
+ except ImportError:
109
+ raise ImportError(
110
+ "tree-sitter-python is required for Python parsing. "
111
+ "Install with: pip install neuroloom-codeweaver"
112
+ )
113
+ lang = Language(ts_python.language())
114
+ return Parser(lang)
115
+
116
+ else:
117
+ raise ValueError(f"Unsupported language: {language}")
118
+
119
+
120
+ def _qualified(rel_path: str, name: str) -> str:
121
+ """Build qualified_name: '{rel_path}::{name}'."""
122
+ return f"{rel_path}::{name}"
123
+
124
+
125
+ def _decode_node_text(node: Any) -> str | None:
126
+ """Decode a tree-sitter node's text bytes to str."""
127
+ raw = node.text
128
+ if raw is None:
129
+ return None
130
+ return cast(str, raw.decode("utf-8", errors="replace"))
131
+
132
+
133
+ # ---------------------------------------------------------------------------
134
+ # TypeScript parsing
135
+ # ---------------------------------------------------------------------------
136
+
137
+
138
+ def _parse_typescript(
139
+ source: bytes,
140
+ rel_path: str,
141
+ is_tsx: bool,
142
+ ) -> tuple[list[Symbol], list[Edge], list[Edge]]:
143
+ """Parse TypeScript/TSX source, returning symbols, call edges, import edges."""
144
+ parser = _get_ts_parser("typescript", is_tsx=is_tsx)
145
+ tree = parser.parse(source)
146
+ root = tree.root_node
147
+
148
+ symbols: list[Symbol] = []
149
+ call_edges: list[Edge] = []
150
+ import_edges: list[Edge] = []
151
+
152
+ # MODULE symbol — anchor for import edges
153
+ module_qname = _qualified(rel_path, "MODULE")
154
+ symbols.append(Symbol(
155
+ qualified_name=module_qname,
156
+ name="MODULE",
157
+ file_path=rel_path,
158
+ symbol_type="module",
159
+ language="typescript",
160
+ ))
161
+
162
+ # Pass 1: collect declared symbols
163
+ active_class: str | None = None
164
+
165
+ def _visit_symbols(node: Any) -> None:
166
+ nonlocal active_class
167
+ ntype = node.type
168
+
169
+ if ntype == "function_declaration":
170
+ name_node = node.child_by_field_name("name")
171
+ if name_node is not None:
172
+ name = _decode_node_text(name_node)
173
+ if name is not None:
174
+ symbols.append(Symbol(
175
+ qualified_name=_qualified(rel_path, name),
176
+ name=name,
177
+ file_path=rel_path,
178
+ symbol_type="function",
179
+ line_start=node.start_point[0] + 1,
180
+ line_end=node.end_point[0] + 1,
181
+ language="typescript",
182
+ ))
183
+
184
+ elif ntype == "class_declaration":
185
+ name_node = node.child_by_field_name("name")
186
+ if name_node is not None:
187
+ name = _decode_node_text(name_node)
188
+ if name is not None:
189
+ symbols.append(Symbol(
190
+ qualified_name=_qualified(rel_path, name),
191
+ name=name,
192
+ file_path=rel_path,
193
+ symbol_type="class",
194
+ line_start=node.start_point[0] + 1,
195
+ line_end=node.end_point[0] + 1,
196
+ language="typescript",
197
+ ))
198
+ prev = active_class
199
+ active_class = name
200
+ for child in node.children:
201
+ _visit_symbols(child)
202
+ active_class = prev
203
+ return
204
+
205
+ elif ntype == "method_definition":
206
+ name_node = node.child_by_field_name("name")
207
+ if name_node is not None:
208
+ raw_name = _decode_node_text(name_node)
209
+ if raw_name is not None:
210
+ full_name = f"{active_class}.{raw_name}" if active_class else raw_name
211
+ symbols.append(Symbol(
212
+ qualified_name=_qualified(rel_path, full_name),
213
+ name=full_name,
214
+ file_path=rel_path,
215
+ symbol_type="function",
216
+ line_start=node.start_point[0] + 1,
217
+ line_end=node.end_point[0] + 1,
218
+ language="typescript",
219
+ ))
220
+
221
+ elif ntype == "arrow_function":
222
+ parent = node.parent
223
+ if parent is not None and parent.type == "variable_declarator":
224
+ name_node = parent.child_by_field_name("name")
225
+ if name_node is not None:
226
+ name = _decode_node_text(name_node)
227
+ if name is not None:
228
+ symbols.append(Symbol(
229
+ qualified_name=_qualified(rel_path, name),
230
+ name=name,
231
+ file_path=rel_path,
232
+ symbol_type="function",
233
+ line_start=node.start_point[0] + 1,
234
+ line_end=node.end_point[0] + 1,
235
+ language="typescript",
236
+ ))
237
+
238
+ for child in node.children:
239
+ _visit_symbols(child)
240
+
241
+ _visit_symbols(root)
242
+
243
+ # Build local name index for call resolution
244
+ local_name_index: dict[str, str] = {s.name: s.qualified_name for s in symbols}
245
+
246
+ # Pass 2: collect call and import edges
247
+ current_fn_qname: str = module_qname
248
+
249
+ def _derive_class_name(node: Any) -> str | None:
250
+ ancestor = node.parent
251
+ while ancestor is not None:
252
+ if ancestor.type == "class_declaration":
253
+ cn = ancestor.child_by_field_name("name")
254
+ if cn is not None:
255
+ return _decode_node_text(cn)
256
+ return None
257
+ ancestor = ancestor.parent
258
+ return None
259
+
260
+ def _visit_edges(node: Any) -> None:
261
+ nonlocal current_fn_qname
262
+ ntype = node.type
263
+
264
+ if ntype in ("function_declaration", "method_definition", "arrow_function"):
265
+ enclosing: str | None = None
266
+ if ntype == "function_declaration":
267
+ n = node.child_by_field_name("name")
268
+ if n is not None:
269
+ decoded = _decode_node_text(n)
270
+ if decoded is not None:
271
+ enclosing = _qualified(rel_path, decoded)
272
+ elif ntype == "method_definition":
273
+ n = node.child_by_field_name("name")
274
+ if n is not None:
275
+ raw = _decode_node_text(n)
276
+ if raw is not None:
277
+ cls = _derive_class_name(node)
278
+ full = f"{cls}.{raw}" if cls else raw
279
+ enclosing = _qualified(rel_path, full)
280
+ elif ntype == "arrow_function":
281
+ p = node.parent
282
+ if p is not None and p.type == "variable_declarator":
283
+ n = p.child_by_field_name("name")
284
+ if n is not None:
285
+ decoded = _decode_node_text(n)
286
+ if decoded is not None:
287
+ enclosing = _qualified(rel_path, decoded)
288
+
289
+ if enclosing is not None:
290
+ prev = current_fn_qname
291
+ current_fn_qname = enclosing
292
+ for child in node.children:
293
+ _visit_edges(child)
294
+ current_fn_qname = prev
295
+ return
296
+
297
+ elif ntype == "call_expression":
298
+ fn_node = node.child_by_field_name("function")
299
+ if fn_node is not None and fn_node.type == "identifier":
300
+ callee_name = _decode_node_text(fn_node)
301
+ if callee_name is not None and callee_name in local_name_index:
302
+ call_edges.append(Edge(
303
+ source_qualified_name=current_fn_qname,
304
+ target_qualified_name=local_name_index[callee_name],
305
+ edge_type="calls",
306
+ ))
307
+
308
+ elif ntype == "import_statement":
309
+ src_node = node.child_by_field_name("source")
310
+ if src_node is not None:
311
+ raw = _decode_node_text(src_node)
312
+ if raw is not None:
313
+ stripped = raw.strip("\"'")
314
+ import_edges.append(Edge(
315
+ source_qualified_name=module_qname,
316
+ target_qualified_name=stripped, # resolved post-parse
317
+ edge_type="imports",
318
+ ))
319
+
320
+ for child in node.children:
321
+ _visit_edges(child)
322
+
323
+ _visit_edges(root)
324
+
325
+ return symbols, call_edges, import_edges
326
+
327
+
328
+ # ---------------------------------------------------------------------------
329
+ # Python parsing
330
+ # ---------------------------------------------------------------------------
331
+
332
+
333
+ def _parse_python(
334
+ source: bytes,
335
+ rel_path: str,
336
+ ) -> tuple[list[Symbol], list[Edge], list[Edge]]:
337
+ """Parse Python source, returning symbols, call edges, import edges."""
338
+ parser = _get_ts_parser("python")
339
+ tree = parser.parse(source)
340
+ root = tree.root_node
341
+
342
+ symbols: list[Symbol] = []
343
+ call_edges: list[Edge] = []
344
+ import_edges: list[Edge] = []
345
+
346
+ module_qname = _qualified(rel_path, "MODULE")
347
+ symbols.append(Symbol(
348
+ qualified_name=module_qname,
349
+ name="MODULE",
350
+ file_path=rel_path,
351
+ symbol_type="module",
352
+ language="python",
353
+ ))
354
+
355
+ active_class: str | None = None
356
+
357
+ def _visit_symbols(node: Any) -> None:
358
+ nonlocal active_class
359
+ ntype = node.type
360
+
361
+ if ntype == "function_definition":
362
+ name_node = node.child_by_field_name("name")
363
+ if name_node is not None:
364
+ name = _decode_node_text(name_node)
365
+ if name is not None:
366
+ full_name = f"{active_class}.{name}" if active_class else name
367
+ symbols.append(Symbol(
368
+ qualified_name=_qualified(rel_path, full_name),
369
+ name=full_name,
370
+ file_path=rel_path,
371
+ symbol_type="function",
372
+ line_start=node.start_point[0] + 1,
373
+ line_end=node.end_point[0] + 1,
374
+ language="python",
375
+ ))
376
+
377
+ elif ntype == "class_definition":
378
+ name_node = node.child_by_field_name("name")
379
+ if name_node is not None:
380
+ name = _decode_node_text(name_node)
381
+ if name is not None:
382
+ symbols.append(Symbol(
383
+ qualified_name=_qualified(rel_path, name),
384
+ name=name,
385
+ file_path=rel_path,
386
+ symbol_type="class",
387
+ line_start=node.start_point[0] + 1,
388
+ line_end=node.end_point[0] + 1,
389
+ language="python",
390
+ ))
391
+ prev = active_class
392
+ active_class = name
393
+ for child in node.children:
394
+ _visit_symbols(child)
395
+ active_class = prev
396
+ return
397
+
398
+ for child in node.children:
399
+ _visit_symbols(child)
400
+
401
+ _visit_symbols(root)
402
+
403
+ local_name_index: dict[str, str] = {s.name: s.qualified_name for s in symbols}
404
+ current_fn_qname: str = module_qname
405
+
406
+ def _derive_class_name(node: Any) -> str | None:
407
+ ancestor = node.parent
408
+ while ancestor is not None:
409
+ if ancestor.type == "class_definition":
410
+ cn = ancestor.child_by_field_name("name")
411
+ if cn is not None:
412
+ return _decode_node_text(cn)
413
+ return None
414
+ ancestor = ancestor.parent
415
+ return None
416
+
417
+ def _visit_edges(node: Any) -> None:
418
+ nonlocal current_fn_qname
419
+ ntype = node.type
420
+
421
+ if ntype == "function_definition":
422
+ name_node = node.child_by_field_name("name")
423
+ if name_node is not None:
424
+ raw = _decode_node_text(name_node)
425
+ if raw is not None:
426
+ cls = _derive_class_name(node)
427
+ full = f"{cls}.{raw}" if cls else raw
428
+ enclosing = _qualified(rel_path, full)
429
+ prev = current_fn_qname
430
+ current_fn_qname = enclosing
431
+ for child in node.children:
432
+ _visit_edges(child)
433
+ current_fn_qname = prev
434
+ return
435
+
436
+ elif ntype == "call":
437
+ fn_node = node.child_by_field_name("function")
438
+ if fn_node is not None and fn_node.type == "identifier":
439
+ callee_name = _decode_node_text(fn_node)
440
+ if callee_name is not None and callee_name in local_name_index:
441
+ call_edges.append(Edge(
442
+ source_qualified_name=current_fn_qname,
443
+ target_qualified_name=local_name_index[callee_name],
444
+ edge_type="calls",
445
+ ))
446
+
447
+ elif ntype == "import_from_statement":
448
+ module_node = node.child_by_field_name("module_name")
449
+ if module_node is not None:
450
+ module_name = _decode_node_text(module_node)
451
+ if module_name is not None:
452
+ import_edges.append(Edge(
453
+ source_qualified_name=module_qname,
454
+ target_qualified_name=module_name,
455
+ edge_type="imports",
456
+ ))
457
+
458
+ elif ntype == "import_statement":
459
+ for child in node.children:
460
+ if child.type == "dotted_name":
461
+ module_name = _decode_node_text(child)
462
+ if module_name is not None:
463
+ import_edges.append(Edge(
464
+ source_qualified_name=module_qname,
465
+ target_qualified_name=module_name,
466
+ edge_type="imports",
467
+ ))
468
+
469
+ for child in node.children:
470
+ _visit_edges(child)
471
+
472
+ _visit_edges(root)
473
+
474
+ return symbols, call_edges, import_edges
475
+
476
+
477
+ # ---------------------------------------------------------------------------
478
+ # File discovery
479
+ # ---------------------------------------------------------------------------
480
+
481
+
482
+ def discover_files(root: Path, extensions: set[str] | None = None) -> list[Path]:
483
+ """Walk root recursively, returning files with supported extensions.
484
+
485
+ Skips directories in _SKIP_DIRS.
486
+ """
487
+ if extensions is None:
488
+ extensions = set(_EXTENSION_MAP.keys())
489
+
490
+ results: list[Path] = []
491
+ for path in root.rglob("*"):
492
+ if not path.is_file():
493
+ continue
494
+ if any(part in _SKIP_DIRS for part in path.parts):
495
+ continue
496
+ if path.suffix in extensions:
497
+ results.append(path)
498
+ return sorted(results)
499
+
500
+
501
+ # ---------------------------------------------------------------------------
502
+ # Import map construction and call resolution
503
+ # ---------------------------------------------------------------------------
504
+
505
+
506
+ @dataclass
507
+ class ImportMap:
508
+ """Per-file import resolution table.
509
+
510
+ Maps local names to their source qualified names so call edges
511
+ can be resolved to the correct target symbol.
512
+ """
513
+ # Maps local name -> source qualified_name
514
+ named_imports: dict[str, str] = field(default_factory=dict)
515
+ # Maps module path -> list of re-exported names (barrel files)
516
+ namespace_imports: dict[str, list[str]] = field(default_factory=dict)
517
+
518
+
519
+ def _resolve_ts_import_path(
520
+ importer_rel: str,
521
+ import_path: str,
522
+ known_files: set[str],
523
+ file_stem_index: dict[str, str],
524
+ ) -> str | None:
525
+ """Resolve a TypeScript relative import to a workspace-relative file path."""
526
+ if not import_path.startswith("."):
527
+ return None # external package
528
+
529
+ importer_dir = str(Path(importer_rel).parent)
530
+ if importer_dir == ".":
531
+ resolved_base = import_path
532
+ else:
533
+ resolved_base = str(Path(importer_dir) / import_path)
534
+
535
+ resolved_base = str(Path(resolved_base))
536
+
537
+ # Try exact match
538
+ if resolved_base in file_stem_index:
539
+ return file_stem_index[resolved_base]
540
+
541
+ # Try extension guessing
542
+ for suffix in (".ts", ".tsx", "/index.ts", "/index.tsx"):
543
+ candidate = resolved_base + suffix
544
+ if candidate in known_files:
545
+ return candidate
546
+
547
+ return None
548
+
549
+
550
+ def _build_ts_import_map(
551
+ source: bytes,
552
+ rel_path: str,
553
+ is_tsx: bool,
554
+ known_files: set[str],
555
+ file_stem_index: dict[str, str],
556
+ all_symbols_by_file: dict[str, list[Symbol]],
557
+ _visited: set[str] | None = None,
558
+ _depth: int = 0,
559
+ ) -> ImportMap:
560
+ """Build an import map for a TypeScript file.
561
+
562
+ Parses import statements and resolves them to qualified names.
563
+ Follows barrel file re-exports up to 5 hops with cycle detection.
564
+ """
565
+ MAX_BARREL_DEPTH = 5
566
+ import_map = ImportMap()
567
+
568
+ if _visited is None:
569
+ _visited = set()
570
+ if rel_path in _visited or _depth > MAX_BARREL_DEPTH:
571
+ if _depth > MAX_BARREL_DEPTH:
572
+ logger.warning("Barrel file depth cap hit at %s (depth %d)", rel_path, _depth)
573
+ return import_map
574
+ _visited.add(rel_path)
575
+
576
+ parser = _get_ts_parser("typescript", is_tsx=is_tsx)
577
+ tree = parser.parse(source)
578
+ root = tree.root_node
579
+
580
+ for node in root.children:
581
+ if node.type != "import_statement":
582
+ continue
583
+
584
+ src_node = node.child_by_field_name("source")
585
+ if src_node is None:
586
+ continue
587
+ raw_source = _decode_node_text(src_node)
588
+ if raw_source is None:
589
+ continue
590
+ module_spec = raw_source.strip("\"'")
591
+
592
+ resolved_file = _resolve_ts_import_path(
593
+ rel_path, module_spec, known_files, file_stem_index
594
+ )
595
+ if resolved_file is None:
596
+ continue
597
+
598
+ # Extract named imports: import { Foo, Bar } from "./module"
599
+ for child in node.children:
600
+ if child.type == "import_clause":
601
+ for clause_child in child.children:
602
+ if clause_child.type == "named_imports":
603
+ for spec in clause_child.children:
604
+ if spec.type == "import_specifier":
605
+ name_node = spec.child_by_field_name("name")
606
+ alias_node = spec.child_by_field_name("alias")
607
+ if name_node is not None:
608
+ original_name = _decode_node_text(name_node)
609
+ local_name = (
610
+ _decode_node_text(alias_node)
611
+ if alias_node is not None
612
+ else original_name
613
+ )
614
+ if original_name and local_name:
615
+ target_qname = f"{resolved_file}::{original_name}"
616
+ import_map.named_imports[local_name] = target_qname
617
+
618
+ return import_map
619
+
620
+
621
+ def _build_py_import_map(
622
+ source: bytes,
623
+ rel_path: str,
624
+ ) -> ImportMap:
625
+ """Build an import map for a Python file.
626
+
627
+ Handles:
628
+ - from .module import name (relative imports)
629
+ - from module import name (absolute imports)
630
+ - import module
631
+ """
632
+ import_map = ImportMap()
633
+
634
+ parser = _get_ts_parser("python")
635
+ tree = parser.parse(source)
636
+ root = tree.root_node
637
+
638
+ for node in root.children:
639
+ if node.type == "import_from_statement":
640
+ module_node = node.child_by_field_name("module_name")
641
+ if module_node is None:
642
+ continue
643
+ module_name = _decode_node_text(module_node)
644
+ if module_name is None:
645
+ continue
646
+
647
+ # Resolve relative imports
648
+ if module_name.startswith("."):
649
+ dots = len(module_name) - len(module_name.lstrip("."))
650
+ rel_module = module_name[dots:]
651
+ current_dir = Path(rel_path).parent
652
+ for _ in range(dots - 1):
653
+ current_dir = current_dir.parent
654
+ if rel_module:
655
+ base_path = str(current_dir / rel_module.replace(".", "/"))
656
+ else:
657
+ base_path = str(current_dir)
658
+ module_name = base_path.replace("/", ".")
659
+
660
+ # Extract named imports
661
+ for child in node.children:
662
+ if child.type == "dotted_name" and child != module_node:
663
+ name = _decode_node_text(child)
664
+ if name:
665
+ import_map.named_imports[name] = f"{module_name}::{name}"
666
+ elif child.type == "aliased_import":
667
+ name_node = child.child_by_field_name("name")
668
+ alias_node = child.child_by_field_name("alias")
669
+ if name_node is not None:
670
+ original = _decode_node_text(name_node)
671
+ local = (
672
+ _decode_node_text(alias_node)
673
+ if alias_node is not None
674
+ else original
675
+ )
676
+ if original and local:
677
+ import_map.named_imports[local] = f"{module_name}::{original}"
678
+
679
+ return import_map
680
+
681
+
682
+ def resolve_call(
683
+ callee_name: str,
684
+ import_map: ImportMap,
685
+ same_file_symbols: dict[str, str],
686
+ global_name_index: dict[str, str],
687
+ ) -> str | None:
688
+ """Resolve a call expression to a qualified name.
689
+
690
+ Priority:
691
+ 1. Import-scoped match (callee in file's import map)
692
+ 2. Same-file match (callee defined in this file)
693
+ 3. Bare-name fallback (callee in global index)
694
+ """
695
+ # 1. Import-scoped
696
+ if callee_name in import_map.named_imports:
697
+ return import_map.named_imports[callee_name]
698
+ # 2. Same-file
699
+ if callee_name in same_file_symbols:
700
+ return same_file_symbols[callee_name]
701
+ # 3. Bare-name fallback
702
+ return global_name_index.get(callee_name)
703
+
704
+
705
+ # ---------------------------------------------------------------------------
706
+ # Single-file parsing
707
+ # ---------------------------------------------------------------------------
708
+
709
+
710
+ def parse_file(
711
+ path: Path,
712
+ rel_path: str,
713
+ language: str,
714
+ ) -> tuple[list[Symbol], list[Edge], list[Edge]]:
715
+ """Parse a single file and return (symbols, call_edges, import_edges).
716
+
717
+ Args:
718
+ path: Absolute path to the source file.
719
+ rel_path: Workspace-relative path (used in qualified names).
720
+ language: "typescript" or "python".
721
+
722
+ Returns:
723
+ Tuple of (symbols, call_edges, import_edges).
724
+ """
725
+ source = path.read_bytes()
726
+
727
+ if language == "typescript":
728
+ is_tsx = path.suffix == ".tsx"
729
+ return _parse_typescript(source, rel_path, is_tsx)
730
+ elif language == "python":
731
+ return _parse_python(source, rel_path)
732
+ else:
733
+ raise ValueError(f"Unsupported language: {language}")
734
+
735
+
736
+ # ---------------------------------------------------------------------------
737
+ # Multi-file parsing → SyncPayload
738
+ # ---------------------------------------------------------------------------
739
+
740
+
741
+ def parse_files(
742
+ paths: list[Path],
743
+ workspace_root: Path,
744
+ ) -> dict[str, Any]:
745
+ """Parse multiple files and return a dict matching the SyncPayload schema.
746
+
747
+ Uses import-scoped call resolution when possible, falling back to
748
+ bare-name matching for unresolvable calls.
749
+ """
750
+ # Phase 1: Parse all files (bare-name resolution)
751
+ all_symbols: list[Symbol] = []
752
+ all_call_edges: list[Edge] = []
753
+ all_import_edges: list[Edge] = []
754
+ symbols_by_file: dict[str, list[Symbol]] = {}
755
+ file_sources: dict[str, bytes] = {}
756
+
757
+ known_files: set[str] = set()
758
+ file_stem_index: dict[str, str] = {}
759
+
760
+ for abs_path in paths:
761
+ if not abs_path.is_file():
762
+ continue
763
+ rel = str(abs_path.relative_to(workspace_root))
764
+ known_files.add(rel)
765
+ stem = str(abs_path.with_suffix("").relative_to(workspace_root))
766
+ file_stem_index[stem] = rel
767
+ file_stem_index[rel] = rel
768
+
769
+ for abs_path in paths:
770
+ if not abs_path.is_file():
771
+ continue
772
+
773
+ suffix = abs_path.suffix
774
+ language = _EXTENSION_MAP.get(suffix)
775
+ if language is None:
776
+ continue
777
+
778
+ rel_path = str(abs_path.relative_to(workspace_root))
779
+
780
+ try:
781
+ source = abs_path.read_bytes()
782
+ file_sources[rel_path] = source
783
+ syms, calls, imports = parse_file(abs_path, rel_path, language)
784
+ all_symbols.extend(syms)
785
+ all_call_edges.extend(calls)
786
+ all_import_edges.extend(imports)
787
+ symbols_by_file[rel_path] = syms
788
+ except Exception:
789
+ logger.exception("Failed to parse %s", rel_path)
790
+ continue
791
+
792
+ # Phase 2: Build import maps and re-resolve call edges
793
+ global_name_index: dict[str, str] = {s.name: s.qualified_name for s in all_symbols}
794
+
795
+ resolved_edges: list[Edge] = []
796
+ for edge in all_call_edges:
797
+ # Extract file path from source qualified name (format: "file::name")
798
+ src_parts = edge.source_qualified_name.split("::", 1)
799
+ if len(src_parts) < 2:
800
+ resolved_edges.append(edge)
801
+ continue
802
+
803
+ src_file = src_parts[0]
804
+ callee_parts = edge.target_qualified_name.split("::", 1)
805
+ callee_name = callee_parts[1] if len(callee_parts) == 2 else edge.target_qualified_name
806
+
807
+ # Build same-file symbol index
808
+ same_file = {
809
+ s.name: s.qualified_name
810
+ for s in symbols_by_file.get(src_file, [])
811
+ }
812
+
813
+ # Build import map for this file
814
+ source_bytes = file_sources.get(src_file)
815
+ language = _EXTENSION_MAP.get(Path(src_file).suffix, "")
816
+ import_map = ImportMap()
817
+
818
+ if source_bytes and language == "typescript":
819
+ is_tsx = src_file.endswith(".tsx")
820
+ try:
821
+ import_map = _build_ts_import_map(
822
+ source_bytes, src_file, is_tsx,
823
+ known_files, file_stem_index, symbols_by_file,
824
+ )
825
+ except Exception:
826
+ logger.debug("Failed to build import map for %s", src_file)
827
+ elif source_bytes and language == "python":
828
+ try:
829
+ import_map = _build_py_import_map(source_bytes, src_file)
830
+ except Exception:
831
+ logger.debug("Failed to build import map for %s", src_file)
832
+
833
+ # Resolve
834
+ resolved_target = resolve_call(callee_name, import_map, same_file, global_name_index)
835
+ if resolved_target:
836
+ resolved_edges.append(Edge(
837
+ source_qualified_name=edge.source_qualified_name,
838
+ target_qualified_name=resolved_target,
839
+ edge_type=edge.edge_type,
840
+ metadata=edge.metadata,
841
+ ))
842
+ # else: drop unresolvable edge
843
+
844
+ # Build payload
845
+ symbols_payload = [
846
+ {
847
+ "qualified_name": s.qualified_name,
848
+ "name": s.name,
849
+ "file_path": s.file_path,
850
+ "symbol_type": s.symbol_type,
851
+ "line_start": s.line_start,
852
+ "line_end": s.line_end,
853
+ "language": s.language,
854
+ "metadata": s.metadata,
855
+ }
856
+ for s in all_symbols
857
+ ]
858
+
859
+ all_edges_final = resolved_edges + all_import_edges
860
+ edges_payload = [
861
+ {
862
+ "source_qualified_name": e.source_qualified_name,
863
+ "target_qualified_name": e.target_qualified_name,
864
+ "edge_type": e.edge_type,
865
+ "metadata": e.metadata,
866
+ }
867
+ for e in all_edges_final
868
+ ]
869
+
870
+ return {
871
+ "symbols": symbols_payload,
872
+ "edges": edges_payload,
873
+ "deleted_files": [],
874
+ }
File without changes
@@ -0,0 +1,48 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "neuroloom-codeweaver"
7
+ version = "0.1.0"
8
+ description = "Client-side Tree-sitter parser for Neuroloom code graph extraction"
9
+ license = { text = "MIT" }
10
+ readme = "README.md"
11
+ requires-python = ">=3.12"
12
+ classifiers = [
13
+ "Development Status :: 3 - Alpha",
14
+ "Intended Audience :: Developers",
15
+ "License :: OSI Approved :: MIT License",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.12",
18
+ "Topic :: Software Development :: Libraries",
19
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
20
+ ]
21
+ dependencies = [
22
+ "tree-sitter>=0.25.0",
23
+ "tree-sitter-typescript>=0.23.0",
24
+ "tree-sitter-python>=0.23.0",
25
+ ]
26
+
27
+ [project.urls]
28
+ Homepage = "https://neuroloom.dev"
29
+ Documentation = "https://neuroloom.dev/docs"
30
+ Source = "https://github.com/endless-galaxy-studios/neuroloom"
31
+
32
+ [project.optional-dependencies]
33
+ dev = [
34
+ "ruff>=0.9.0",
35
+ "mypy>=1.14.0",
36
+ ]
37
+
38
+ [tool.hatch.build.targets.wheel]
39
+ packages = ["codeweaver"]
40
+
41
+ [tool.ruff]
42
+ line-length = 100
43
+ target-version = "py312"
44
+
45
+ [tool.mypy]
46
+ python_version = "3.12"
47
+ strict = true
48
+ ignore_missing_imports = true