codegraph-nav 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. codegraph_nav/__init__.py +194 -0
  2. codegraph_nav/ast_grep_analyzer.py +448 -0
  3. codegraph_nav/cli.py +223 -0
  4. codegraph_nav/code_navigator.py +1328 -0
  5. codegraph_nav/code_search.py +1009 -0
  6. codegraph_nav/colors.py +209 -0
  7. codegraph_nav/completions.py +354 -0
  8. codegraph_nav/dart_analyzer.py +301 -0
  9. codegraph_nav/dependency_graph.py +814 -0
  10. codegraph_nav/domain/__init__.py +20 -0
  11. codegraph_nav/domain/routes.py +337 -0
  12. codegraph_nav/domain/schemas.py +229 -0
  13. codegraph_nav/domain/tags.py +87 -0
  14. codegraph_nav/exporters.py +563 -0
  15. codegraph_nav/go_analyzer.py +273 -0
  16. codegraph_nav/graph/__init__.py +72 -0
  17. codegraph_nav/graph/builder.py +409 -0
  18. codegraph_nav/graph/communities.py +402 -0
  19. codegraph_nav/graph/flows.py +311 -0
  20. codegraph_nav/graph/query.py +380 -0
  21. codegraph_nav/graph/schema.py +266 -0
  22. codegraph_nav/graph/search.py +257 -0
  23. codegraph_nav/graph/store.py +517 -0
  24. codegraph_nav/hints.py +195 -0
  25. codegraph_nav/import_resolver.py +891 -0
  26. codegraph_nav/js_ts_analyzer.py +564 -0
  27. codegraph_nav/line_reader.py +664 -0
  28. codegraph_nav/mcp/__init__.py +39 -0
  29. codegraph_nav/mcp/__main__.py +5 -0
  30. codegraph_nav/mcp/server.py +2228 -0
  31. codegraph_nav/py.typed +2 -0
  32. codegraph_nav/ruby_analyzer.py +259 -0
  33. codegraph_nav/rust_analyzer.py +379 -0
  34. codegraph_nav/token_efficient_renderer.py +743 -0
  35. codegraph_nav/watcher.py +382 -0
  36. codegraph_nav-0.1.0.dist-info/METADATA +487 -0
  37. codegraph_nav-0.1.0.dist-info/RECORD +41 -0
  38. codegraph_nav-0.1.0.dist-info/WHEEL +5 -0
  39. codegraph_nav-0.1.0.dist-info/entry_points.txt +4 -0
  40. codegraph_nav-0.1.0.dist-info/licenses/LICENSE +21 -0
  41. codegraph_nav-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1328 @@
1
+ #!/usr/bin/env python3
2
+ """Code Mapper - Generates a structural map/graph of a codebase for token-efficient navigation.
3
+
4
+ This module creates a lightweight index of functions, classes, methods, and their
5
+ relationships within a codebase. The generated index can be used for quick symbol
6
+ lookup without reading entire files.
7
+
8
+ Example:
9
+ Command line usage:
10
+ $ codegraph-nav scan /path/to/project -o .codegraph.json
11
+
12
+ Python API usage:
13
+ >>> mapper = CodeNavigator('/path/to/project')
14
+ >>> code_map = mapper.scan()
15
+ >>> print(code_map['stats'])
16
+ {'files_processed': 142, 'symbols_found': 1847, 'errors': 0}
17
+
18
+ Attributes:
19
+ LANGUAGE_EXTENSIONS: Dict mapping language names to file extensions.
20
+ DEFAULT_IGNORE_PATTERNS: List of patterns to ignore when scanning.
21
+ """
22
+
23
+ import argparse
24
+ import ast
25
+ import fnmatch
26
+ import json
27
+ import os
28
+ import re
29
+ import subprocess
30
+ import sys
31
+ import time
32
+ from dataclasses import dataclass
33
+ from datetime import datetime
34
+ from pathlib import Path
35
+ from typing import Any, Protocol
36
+
37
+ from .colors import get_colors
38
+
39
+
40
+ class _Analyzer(Protocol):
41
+ """Structural type for language analyzers exposing analyze()."""
42
+
43
+ def analyze(self) -> list["Symbol"]: ...
44
+
45
+
46
+ __version__ = "0.1.0"
47
+
48
+ # Supported languages and their extensions
49
+ LANGUAGE_EXTENSIONS = {
50
+ "python": [".py"],
51
+ "javascript": [".js", ".jsx", ".mjs"],
52
+ "typescript": [".ts", ".tsx"],
53
+ "java": [".java"],
54
+ "go": [".go"],
55
+ "rust": [".rs"],
56
+ "c": [".c", ".h"],
57
+ "cpp": [".cpp", ".hpp", ".cc", ".hh", ".cxx"],
58
+ "ruby": [".rb"],
59
+ "php": [".php"],
60
+ "dart": [".dart"],
61
+ }
62
+
63
+ DEFAULT_IGNORE_PATTERNS = [
64
+ # Build artifacts and dependencies
65
+ "node_modules",
66
+ "__pycache__",
67
+ "venv",
68
+ "env",
69
+ "dist",
70
+ "build",
71
+ ".next",
72
+ "coverage",
73
+ ".nyc_output",
74
+ "*.min.js",
75
+ "*.bundle.js",
76
+ ".tox",
77
+ "eggs",
78
+ "*.egg-info",
79
+ ".pytest_cache",
80
+ "vendor",
81
+ "target",
82
+ "bin",
83
+ "obj",
84
+ # Dart/Flutter build artifacts and generated code
85
+ ".dart_tool",
86
+ "*.g.dart",
87
+ "*.freezed.dart",
88
+ "*.gr.dart",
89
+ # Version control
90
+ ".git",
91
+ ".svn",
92
+ ".hg",
93
+ # IDE settings
94
+ ".idea",
95
+ ".vscode",
96
+ # Environment files - ALL variants (SECURITY: prevents exposure of secrets)
97
+ ".env",
98
+ ".env.*",
99
+ ".env.local",
100
+ ".env.*.local",
101
+ ".env.production*",
102
+ ".env.development*",
103
+ ".envrc",
104
+ "*.env",
105
+ # Credentials and secrets (SECURITY)
106
+ "secrets*",
107
+ "*secret*",
108
+ "*secrets*",
109
+ "*credential*",
110
+ "*credentials*",
111
+ ".aws",
112
+ ".gcp",
113
+ ".ssh",
114
+ ".gnupg",
115
+ # Keys and certificates (SECURITY)
116
+ "*.pem",
117
+ "*.key",
118
+ "*.p8",
119
+ "*.p12",
120
+ "*.pfx",
121
+ "id_rsa*",
122
+ "id_ed25519*",
123
+ "id_ecdsa*",
124
+ "*.crt",
125
+ "*.cer",
126
+ # Config files with potential secrets (SECURITY)
127
+ ".npmrc",
128
+ ".pypirc",
129
+ ".netrc",
130
+ "config/database.yml",
131
+ "config/secrets.yml",
132
+ # API keys and tokens
133
+ "*apikey*",
134
+ "*api_key*",
135
+ "*token*",
136
+ ]
137
+
138
+
139
+ @dataclass
140
+ class Symbol:
141
+ """Represents a code symbol (function, class, method, etc.).
142
+
143
+ Attributes:
144
+ name: The symbol's name (e.g., 'process_payment').
145
+ type: The symbol type ('function', 'class', 'method', 'variable', 'import').
146
+ file_path: Relative path to the file containing the symbol.
147
+ line_start: Starting line number (1-indexed).
148
+ line_end: Ending line number (1-indexed, inclusive).
149
+ signature: Function/class signature (e.g., 'def foo(x: int) -> str').
150
+ docstring: First few lines of docstring, if present.
151
+ parent: For methods, the containing class name.
152
+ dependencies: List of symbols this symbol calls/uses.
153
+ decorators: List of decorator names applied to this symbol.
154
+
155
+ Example:
156
+ >>> symbol = Symbol(
157
+ ... name='process_payment',
158
+ ... type='function',
159
+ ... file_path='src/billing.py',
160
+ ... line_start=45,
161
+ ... line_end=89,
162
+ ... signature='def process_payment(user_id: int, amount: Decimal)'
163
+ ... )
164
+ """
165
+
166
+ name: str
167
+ type: str
168
+ file_path: str
169
+ line_start: int
170
+ line_end: int
171
+ signature: str | None = None
172
+ docstring: str | None = None
173
+ parent: str | None = None
174
+ dependencies: list[str] | None = None
175
+ decorators: list[str] | None = None
176
+ truncated: bool = False # True if symbol exceeded max line limit during analysis
177
+
178
+ def __post_init__(self):
179
+ """Initialize mutable default values."""
180
+ if self.dependencies is None:
181
+ self.dependencies = []
182
+ if self.decorators is None:
183
+ self.decorators = []
184
+
185
+
186
+ class PythonAnalyzer(ast.NodeVisitor):
187
+ """Analyzes Python files using AST for accurate symbol extraction.
188
+
189
+ This analyzer provides the most accurate symbol detection for Python files,
190
+ using Python's built-in AST module to parse the code structure.
191
+
192
+ Attributes:
193
+ file_path: Path to the file being analyzed.
194
+ source: Source code content.
195
+ lines: List of source lines.
196
+ symbols: Extracted symbols.
197
+ current_class: Name of class currently being visited (for method detection).
198
+ imports: List of imported modules/names.
199
+
200
+ Example:
201
+ >>> source = '''
202
+ ... def greet(name: str) -> str:
203
+ ... \"\"\"Say hello.\"\"\"
204
+ ... return f"Hello, {name}"
205
+ ... '''
206
+ >>> analyzer = PythonAnalyzer('example.py', source)
207
+ >>> symbols = analyzer.analyze()
208
+ >>> print(symbols[0].name)
209
+ 'greet'
210
+ """
211
+
212
+ def __init__(self, file_path: str, source: str):
213
+ """Initialize the Python analyzer.
214
+
215
+ Args:
216
+ file_path: Relative path to the file.
217
+ source: Source code content.
218
+ """
219
+ self.file_path = file_path
220
+ self.source = source
221
+ self.lines = source.split("\n")
222
+ self.symbols: list[Symbol] = []
223
+ self.current_class: str | None = None
224
+ self.imports: list[str] = []
225
+
226
+ def get_line_end(self, node) -> int:
227
+ """Get the end line of an AST node.
228
+
229
+ Args:
230
+ node: An AST node.
231
+
232
+ Returns:
233
+ The ending line number of the node.
234
+ """
235
+ if hasattr(node, "end_lineno") and node.end_lineno:
236
+ return int(node.end_lineno)
237
+ if hasattr(node, "body") and node.body:
238
+ last_node = node.body[-1]
239
+ return self.get_line_end(last_node)
240
+ return int(node.lineno)
241
+
242
+ def get_signature(self, node) -> str:
243
+ """Extract function/method signature from an AST node.
244
+
245
+ Args:
246
+ node: A FunctionDef or AsyncFunctionDef AST node.
247
+
248
+ Returns:
249
+ String representation of the function signature.
250
+
251
+ Example:
252
+ >>> # For 'async def foo(x: int) -> str:'
253
+ >>> signature = analyzer.get_signature(node)
254
+ >>> print(signature)
255
+ 'async def foo(x: int) -> str'
256
+ """
257
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
258
+ args = []
259
+ for arg in node.args.args:
260
+ arg_str = arg.arg
261
+ if arg.annotation:
262
+ try:
263
+ arg_str += f": {ast.unparse(arg.annotation)}"
264
+ except (TypeError, AttributeError, RecursionError, ValueError):
265
+ # ast.unparse can fail on malformed/complex AST nodes
266
+ pass
267
+ args.append(arg_str)
268
+
269
+ returns = ""
270
+ if node.returns:
271
+ try:
272
+ returns = f" -> {ast.unparse(node.returns)}"
273
+ except (TypeError, AttributeError, RecursionError, ValueError):
274
+ # ast.unparse can fail on malformed/complex AST nodes
275
+ pass
276
+
277
+ prefix = "async " if isinstance(node, ast.AsyncFunctionDef) else ""
278
+ return f"{prefix}def {node.name}({', '.join(args)}){returns}"
279
+ return ""
280
+
281
+ def get_decorators(self, node) -> list[str]:
282
+ """Extract decorator names from an AST node.
283
+
284
+ Args:
285
+ node: An AST node with decorator_list attribute.
286
+
287
+ Returns:
288
+ List of decorator name strings.
289
+ """
290
+ decorators = []
291
+ for dec in node.decorator_list:
292
+ try:
293
+ decorators.append(ast.unparse(dec))
294
+ except (TypeError, AttributeError, RecursionError, ValueError):
295
+ # Fallback: try to get simple decorator name
296
+ if isinstance(dec, ast.Name):
297
+ decorators.append(dec.id)
298
+ return decorators
299
+
300
+ def get_docstring(self, node) -> str | None:
301
+ """Extract docstring from an AST node, truncated for efficiency.
302
+
303
+ Args:
304
+ node: An AST node that may have a docstring.
305
+
306
+ Returns:
307
+ First 3 lines of the docstring, or None if no docstring.
308
+ """
309
+ doc = ast.get_docstring(node)
310
+ if doc:
311
+ lines = doc.split("\n")
312
+ if len(lines) > 3:
313
+ return "\n".join(lines[:3]) + "..."
314
+ return doc
315
+ return None
316
+
317
+ def visit_Import(self, node):
318
+ """Visit an import statement."""
319
+ for alias in node.names:
320
+ self.imports.append(alias.name)
321
+ self.generic_visit(node)
322
+
323
+ def visit_ImportFrom(self, node):
324
+ """Visit a from...import statement."""
325
+ module = node.module or ""
326
+ for alias in node.names:
327
+ self.imports.append(f"{module}.{alias.name}")
328
+ self.generic_visit(node)
329
+
330
+ def visit_ClassDef(self, node):
331
+ """Visit a class definition."""
332
+ bases = []
333
+ for base in node.bases:
334
+ try:
335
+ bases.append(ast.unparse(base))
336
+ except (TypeError, AttributeError, RecursionError, ValueError):
337
+ # ast.unparse can fail on complex/malformed base class expressions
338
+ pass
339
+
340
+ signature = f"class {node.name}"
341
+ if bases:
342
+ signature += f"({', '.join(bases)})"
343
+
344
+ symbol = Symbol(
345
+ name=node.name,
346
+ type="class",
347
+ file_path=self.file_path,
348
+ line_start=node.lineno,
349
+ line_end=self.get_line_end(node),
350
+ signature=signature,
351
+ docstring=self.get_docstring(node),
352
+ decorators=self.get_decorators(node),
353
+ )
354
+ self.symbols.append(symbol)
355
+
356
+ old_class = self.current_class
357
+ self.current_class = node.name
358
+ self.generic_visit(node)
359
+ self.current_class = old_class
360
+
361
+ def visit_FunctionDef(self, node):
362
+ """Visit a function definition."""
363
+ self._visit_function(node)
364
+
365
+ def visit_AsyncFunctionDef(self, node):
366
+ """Visit an async function definition."""
367
+ self._visit_function(node)
368
+
369
+ def _visit_function(self, node):
370
+ """Process a function or async function definition.
371
+
372
+ Args:
373
+ node: A FunctionDef or AsyncFunctionDef AST node.
374
+ """
375
+ symbol_type = "method" if self.current_class else "function"
376
+
377
+ calls = []
378
+ for child in ast.walk(node):
379
+ if isinstance(child, ast.Call):
380
+ if isinstance(child.func, ast.Name):
381
+ calls.append(child.func.id)
382
+ elif isinstance(child.func, ast.Attribute):
383
+ calls.append(child.func.attr)
384
+
385
+ symbol = Symbol(
386
+ name=node.name,
387
+ type=symbol_type,
388
+ file_path=self.file_path,
389
+ line_start=node.lineno,
390
+ line_end=self.get_line_end(node),
391
+ signature=self.get_signature(node),
392
+ docstring=self.get_docstring(node),
393
+ parent=self.current_class,
394
+ dependencies=list(set(calls)),
395
+ decorators=self.get_decorators(node),
396
+ )
397
+ self.symbols.append(symbol)
398
+ self.generic_visit(node)
399
+
400
+ def analyze(self) -> list[Symbol]:
401
+ """Parse and analyze the file.
402
+
403
+ Returns:
404
+ List of Symbol objects found in the file.
405
+
406
+ Raises:
407
+ SyntaxError: If the file has invalid Python syntax (caught and logged).
408
+ """
409
+ try:
410
+ tree = ast.parse(self.source)
411
+ self.visit(tree)
412
+ except SyntaxError as e:
413
+ print(f"Syntax error in {self.file_path}: {e}", file=sys.stderr)
414
+ return self.symbols
415
+
416
+
417
+ class GenericAnalyzer:
418
+ """Regex-based analyzer for non-Python languages.
419
+
420
+ Provides symbol detection for JavaScript, TypeScript, Java, Go, Rust, and C/C++
421
+ using regular expression patterns. Less accurate than AST analysis but works
422
+ across multiple languages.
423
+
424
+ Attributes:
425
+ PATTERNS: Dict of regex patterns for each supported language.
426
+ file_path: Path to the file being analyzed.
427
+ source: Source code content.
428
+ language: The programming language of the file.
429
+
430
+ Example:
431
+ >>> source = 'function greet(name) { return "Hello, " + name; }'
432
+ >>> analyzer = GenericAnalyzer('example.js', source, 'javascript')
433
+ >>> symbols = analyzer.analyze()
434
+ """
435
+
436
+ PATTERNS = {
437
+ "javascript": {
438
+ "function": r"(?:async\s+)?function\s+(\w+)\s*\([^)]*\)",
439
+ "arrow": r"(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\([^)]*\)\s*=>",
440
+ "class": r"class\s+(\w+)(?:\s+extends\s+\w+)?",
441
+ "method": r"(?:async\s+)?(\w+)\s*\([^)]*\)\s*{",
442
+ },
443
+ "typescript": {
444
+ "function": r"(?:async\s+)?function\s+(\w+)\s*(?:<[^>]*>)?\s*\([^)]*\)",
445
+ "interface": r"interface\s+(\w+)",
446
+ "type": r"type\s+(\w+)\s*=",
447
+ "class": r"class\s+(\w+)(?:\s+extends\s+\w+)?(?:\s+implements\s+\w+)?",
448
+ },
449
+ "java": {
450
+ "class": r"(?:public|private|protected)?\s*class\s+(\w+)",
451
+ "interface": r"interface\s+(\w+)",
452
+ "method": r"(?:public|private|protected)?\s*(?:static\s+)?(?:\w+(?:<[^>]*>)?)\s+(\w+)\s*\([^)]*\)",
453
+ },
454
+ "go": {
455
+ "function": r"func\s+(\w+)\s*(?:\[[^\]]*\])?\s*\(",
456
+ "method": r"func\s+\([^)]+\)\s+(\w+)\s*(?:\[[^\]]*\])?\s*\(",
457
+ "struct": r"type\s+(\w+)\s+struct",
458
+ "interface": r"type\s+(\w+)\s+interface",
459
+ "type_alias": r"type\s+(\w+)\s+(?!struct\b|interface\b)\w+",
460
+ },
461
+ "ruby": {
462
+ "function": r"^[ \t]*def\s+(?!self\.)(\w+[!?=]?)",
463
+ "class": r"^[ \t]*class\s+([A-Z]\w*)",
464
+ "module": r"^[ \t]*module\s+([A-Z]\w*)",
465
+ },
466
+ "rust": {
467
+ "function": r"(?:pub\s+)?(?:async\s+)?fn\s+(\w+)",
468
+ "struct": r"(?:pub\s+)?struct\s+(\w+)",
469
+ "impl": r"impl(?:<[^>]*>)?\s+(\w+)",
470
+ "trait": r"(?:pub\s+)?trait\s+(\w+)",
471
+ "enum": r"(?:pub\s+)?enum\s+(\w+)",
472
+ },
473
+ "dart": {
474
+ "class": r"(?:abstract\s+)?class\s+(\w+)",
475
+ "mixin": r"^[ \t]*mixin\s+(\w+)",
476
+ "enum": r"enum\s+(\w+)\s*\{",
477
+ "extension": r"extension\s+(\w+)\s+on\s+\w+",
478
+ "function": r"^[ \t]*(?!(?:if|for|while|switch|catch|return|do|else|throw|new|await|assert|yield)\b)(?:Future(?:<[^>]+>)?|void|String|int|double|bool|num|dynamic|Widget|List(?:<[^>]+>)?|Map(?:<[^>]+>)?|Set(?:<[^>]+>)?|Iterable(?:<[^>]+>)?|Stream(?:<[^>]+>)?|[A-Z]\w*\??)\s+(\w+)\s*\([^)]*\)\s*(?:async\s*\*?\s*)?\{",
479
+ },
480
+ }
481
+
482
+ # Maximum lines to scan for a symbol's end before giving up
483
+ MAX_SYMBOL_LINES = 500
484
+
485
+ # Languages that use 'end' keyword instead of braces for block termination
486
+ KEYWORD_END_LANGUAGES = {"ruby"}
487
+
488
+ # Keywords that open a new block in end-based languages
489
+ _END_OPENERS = (
490
+ "def ",
491
+ "class ",
492
+ "module ",
493
+ "do",
494
+ "if ",
495
+ "unless ",
496
+ "while ",
497
+ "until ",
498
+ "for ",
499
+ "begin",
500
+ "case ",
501
+ )
502
+
503
+ def __init__(self, file_path: str, source: str, language: str):
504
+ """Initialize the generic analyzer.
505
+
506
+ Args:
507
+ file_path: Relative path to the file.
508
+ source: Source code content.
509
+ language: Programming language identifier.
510
+ """
511
+ self.file_path = file_path
512
+ self.source = source
513
+ self.language = language
514
+ self.lines = source.split("\n")
515
+
516
+ def analyze(self) -> list[Symbol]:
517
+ """Analyze the file using regex patterns.
518
+
519
+ Returns:
520
+ List of Symbol objects found in the file.
521
+ """
522
+ import re
523
+
524
+ symbols = []
525
+ patterns = self.PATTERNS.get(self.language, {})
526
+
527
+ for symbol_type, pattern in patterns.items():
528
+ for match in re.finditer(pattern, self.source, re.MULTILINE):
529
+ name = match.group(1)
530
+ line_num = self.source[: match.start()].count("\n") + 1
531
+
532
+ line_end = line_num
533
+ was_truncated = False
534
+
535
+ if self.language in self.KEYWORD_END_LANGUAGES:
536
+ # Keyword-based end detection (Ruby: def/class/module ... end)
537
+ depth = 1
538
+ for i, line in enumerate(self.lines[line_num:], start=line_num + 1):
539
+ stripped = line.strip()
540
+ if not stripped.startswith("#"):
541
+ for kw in self._END_OPENERS:
542
+ if stripped.startswith(kw) or stripped == kw.strip():
543
+ depth += 1
544
+ break
545
+ if (
546
+ stripped == "end"
547
+ or stripped.startswith("end ")
548
+ or stripped.startswith("end;")
549
+ ):
550
+ depth -= 1
551
+ if depth <= 0:
552
+ line_end = i
553
+ break
554
+ if i > line_num + self.MAX_SYMBOL_LINES:
555
+ line_end = i
556
+ was_truncated = True
557
+ break
558
+ else:
559
+ # Brace-based end detection (Go, JS, Java, Rust, C/C++)
560
+ brace_count = 0
561
+ started = False
562
+ for i, line in enumerate(self.lines[line_num - 1 :], start=line_num):
563
+ brace_count += line.count("{") - line.count("}")
564
+ if "{" in line:
565
+ started = True
566
+ if started and brace_count <= 0:
567
+ line_end = i
568
+ break
569
+ if i > line_num + self.MAX_SYMBOL_LINES:
570
+ line_end = i
571
+ was_truncated = True
572
+ break
573
+
574
+ symbols.append(
575
+ Symbol(
576
+ name=name,
577
+ type=symbol_type,
578
+ file_path=self.file_path,
579
+ line_start=line_num,
580
+ line_end=line_end,
581
+ signature=match.group(0).strip()[:100],
582
+ truncated=was_truncated,
583
+ )
584
+ )
585
+
586
+ return symbols
587
+
588
+
589
+ class GitIntegration:
590
+ """Git integration utilities for the code mapper.
591
+
592
+ Provides methods to get git-tracked files, parse .gitignore,
593
+ and find changes since a specific commit.
594
+
595
+ Attributes:
596
+ root_path: Path to the git repository root.
597
+ available: Whether git is available and this is a git repo.
598
+
599
+ Example:
600
+ >>> git = GitIntegration('/path/to/repo')
601
+ >>> if git.available:
602
+ ... tracked_files = git.get_tracked_files()
603
+ ... print(f"Found {len(tracked_files)} tracked files")
604
+ """
605
+
606
+ def __init__(self, root_path: Path):
607
+ """Initialize git integration.
608
+
609
+ Args:
610
+ root_path: Path to the repository root.
611
+ """
612
+ self.root_path = root_path
613
+ self.available = self._check_git_available()
614
+
615
+ def _check_git_available(self) -> bool:
616
+ """Check if git is available and this is a git repository."""
617
+ try:
618
+ result = subprocess.run(
619
+ ["git", "rev-parse", "--git-dir"],
620
+ cwd=self.root_path,
621
+ capture_output=True,
622
+ text=True,
623
+ timeout=5,
624
+ )
625
+ return result.returncode == 0
626
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
627
+ return False
628
+
629
+ def get_tracked_files(self) -> set[str]:
630
+ """Get all files tracked by git.
631
+
632
+ Returns:
633
+ Set of relative file paths tracked by git.
634
+ """
635
+ if not self.available:
636
+ return set()
637
+
638
+ try:
639
+ result = subprocess.run(
640
+ ["git", "ls-files"],
641
+ cwd=self.root_path,
642
+ capture_output=True,
643
+ text=True,
644
+ timeout=30,
645
+ )
646
+ if result.returncode == 0:
647
+ return set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
648
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
649
+ pass
650
+ return set()
651
+
652
+ def get_gitignore_patterns(self) -> list[str]:
653
+ """Parse .gitignore and return patterns.
654
+
655
+ Returns:
656
+ List of gitignore patterns.
657
+ """
658
+ patterns = []
659
+ gitignore_path = self.root_path / ".gitignore"
660
+
661
+ if gitignore_path.exists():
662
+ try:
663
+ content = gitignore_path.read_text(encoding="utf-8")
664
+ for line in content.splitlines():
665
+ line = line.strip()
666
+ # Skip comments and empty lines
667
+ if line and not line.startswith("#"):
668
+ patterns.append(line)
669
+ except Exception:
670
+ pass
671
+
672
+ return patterns
673
+
674
+ def get_files_changed_since(self, commit: str) -> set[str]:
675
+ """Get files that changed since a specific commit.
676
+
677
+ Args:
678
+ commit: Safe git reference (hash, branch, tag, HEAD~N).
679
+
680
+ Returns:
681
+ Set of relative file paths that have changed.
682
+
683
+ Raises:
684
+ ValueError: If commit contains unsafe characters.
685
+ """
686
+ if not re.fullmatch(r"[a-zA-Z0-9][a-zA-Z0-9_.~^/@{}\-]*", commit):
687
+ raise ValueError(f"Invalid git reference: {commit}")
688
+
689
+ if not self.available:
690
+ return set()
691
+
692
+ try:
693
+ result = subprocess.run(
694
+ ["git", "diff", "--name-only", commit, "HEAD"],
695
+ cwd=self.root_path,
696
+ capture_output=True,
697
+ text=True,
698
+ timeout=30,
699
+ )
700
+ if result.returncode == 0:
701
+ return set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
702
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
703
+ pass
704
+ return set()
705
+
706
+ def get_uncommitted_changes(self) -> set[str]:
707
+ """Get files with uncommitted changes.
708
+
709
+ Returns:
710
+ Set of relative file paths with uncommitted changes.
711
+ """
712
+ if not self.available:
713
+ return set()
714
+
715
+ try:
716
+ # Get both staged and unstaged changes
717
+ result = subprocess.run(
718
+ ["git", "status", "--porcelain"],
719
+ cwd=self.root_path,
720
+ capture_output=True,
721
+ text=True,
722
+ timeout=30,
723
+ )
724
+ if result.returncode == 0:
725
+ files = set()
726
+ for line in result.stdout.strip().split("\n"):
727
+ if line and len(line) > 3:
728
+ # Format: "XY filename" where XY is status
729
+ files.add(line[3:].strip())
730
+ return files
731
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
732
+ pass
733
+ return set()
734
+
735
+
736
+ class CodeNavigator:
737
+ """Main class for mapping a codebase to create a searchable index.
738
+
739
+ Scans a directory tree, analyzes source files, and generates a JSON index
740
+ containing all symbols, their locations, signatures, and dependencies.
741
+
742
+ Attributes:
743
+ root_path: Absolute path to the codebase root.
744
+ ignore_patterns: List of patterns to skip during scanning.
745
+ symbols: List of all discovered symbols.
746
+ file_hashes: Dict mapping file paths to content hashes.
747
+ stats: Dict with processing statistics.
748
+
749
+ Example:
750
+ >>> mapper = CodeNavigator('/path/to/project')
751
+ >>> code_map = mapper.scan()
752
+ >>> print(f"Found {code_map['stats']['symbols_found']} symbols")
753
+ Found 1847 symbols
754
+
755
+ >>> # Save to file
756
+ >>> import json
757
+ >>> with open('.codegraph.json', 'w') as f:
758
+ ... json.dump(code_map, f)
759
+ """
760
+
761
+ def __init__(
762
+ self,
763
+ root_path: str,
764
+ ignore_patterns: list[str] | None = None,
765
+ git_only: bool = False,
766
+ use_gitignore: bool = False,
767
+ ):
768
+ """Initialize the code mapper.
769
+
770
+ Args:
771
+ root_path: Path to the root directory to scan.
772
+ ignore_patterns: Additional patterns to ignore. Merged with defaults.
773
+ git_only: If True, only scan files tracked by git.
774
+ use_gitignore: If True, also ignore patterns from .gitignore.
775
+ """
776
+ self.root_path = Path(root_path).resolve()
777
+ self.ignore_patterns = list(ignore_patterns or DEFAULT_IGNORE_PATTERNS)
778
+ self.git_only = git_only
779
+ self.use_gitignore = use_gitignore
780
+ self.symbols: list[Symbol] = []
781
+ self.file_hashes: dict[str, str] = {}
782
+ self.stats = {"files_processed": 0, "symbols_found": 0, "errors": 0}
783
+ self._existing_map: dict[str, Any] | None = None
784
+
785
+ # Initialize git integration
786
+ self._git = GitIntegration(self.root_path)
787
+ self._git_tracked_files: set[str] | None = None
788
+
789
+ # Add gitignore patterns if requested
790
+ if self.use_gitignore and self._git.available:
791
+ gitignore_patterns = self._git.get_gitignore_patterns()
792
+ self.ignore_patterns.extend(gitignore_patterns)
793
+
794
+ # Cache git tracked files if git_only mode
795
+ if self.git_only and self._git.available:
796
+ self._git_tracked_files = self._git.get_tracked_files()
797
+
798
+ def should_ignore(self, path: Path) -> bool:
799
+ """Check if a path should be ignored during scanning.
800
+
801
+ Args:
802
+ path: Path to check.
803
+
804
+ Returns:
805
+ True if the path matches any ignore pattern or is not git-tracked.
806
+ """
807
+ path_str = str(path)
808
+ name = path.name
809
+
810
+ for pattern in self.ignore_patterns:
811
+ if fnmatch.fnmatch(name, pattern):
812
+ return True
813
+ if pattern in path_str:
814
+ return True
815
+
816
+ return False
817
+
818
+ def _is_git_tracked(self, file_path: Path) -> bool:
819
+ """Check if a file is tracked by git.
820
+
821
+ Args:
822
+ file_path: Absolute path to the file.
823
+
824
+ Returns:
825
+ True if the file is git-tracked (or git_only mode is disabled).
826
+ """
827
+ if not self.git_only or self._git_tracked_files is None:
828
+ return True
829
+
830
+ try:
831
+ rel_path = str(file_path.relative_to(self.root_path))
832
+ return rel_path in self._git_tracked_files
833
+ except ValueError:
834
+ return False
835
+
836
+ def get_language(self, file_path: Path) -> str | None:
837
+ """Determine the programming language from file extension.
838
+
839
+ Args:
840
+ file_path: Path to the file.
841
+
842
+ Returns:
843
+ Language identifier string, or None if not recognized.
844
+ """
845
+ ext = file_path.suffix.lower()
846
+ for lang, extensions in LANGUAGE_EXTENSIONS.items():
847
+ if ext in extensions:
848
+ return lang
849
+ return None
850
+
851
+ def hash_file(self, content: str) -> str:
852
+ """Generate a hash for file content.
853
+
854
+ Args:
855
+ content: File content string.
856
+
857
+ Returns:
858
+ 12-character MD5 hash of the content.
859
+ """
860
+ from . import compute_content_hash
861
+
862
+ return compute_content_hash(content)
863
+
864
+ def analyze_file(self, file_path: Path) -> list[Symbol]:
865
+ """Analyze a single file and extract its symbols.
866
+
867
+ Args:
868
+ file_path: Path to the file to analyze.
869
+
870
+ Returns:
871
+ List of Symbol objects found in the file.
872
+ """
873
+ try:
874
+ with open(file_path, encoding="utf-8", errors="ignore") as f:
875
+ content = f.read()
876
+
877
+ rel_path = str(file_path.relative_to(self.root_path))
878
+ self.file_hashes[rel_path] = self.hash_file(content)
879
+
880
+ language = self.get_language(file_path)
881
+ analyzer: _Analyzer
882
+ if language == "python":
883
+ analyzer = PythonAnalyzer(rel_path, content)
884
+ elif language == "javascript":
885
+ from .js_ts_analyzer import JavaScriptAnalyzer
886
+
887
+ is_jsx = file_path.suffix.lower() in (".jsx",)
888
+ analyzer = JavaScriptAnalyzer(rel_path, content, is_jsx=is_jsx)
889
+ elif language == "typescript":
890
+ from .js_ts_analyzer import TypeScriptAnalyzer
891
+
892
+ is_tsx = file_path.suffix.lower() in (".tsx",)
893
+ analyzer = TypeScriptAnalyzer(rel_path, content, is_tsx=is_tsx)
894
+ elif language == "ruby":
895
+ from .ruby_analyzer import RubyAnalyzer
896
+
897
+ analyzer = RubyAnalyzer(rel_path, content)
898
+ elif language == "go":
899
+ from .go_analyzer import GoAnalyzer
900
+
901
+ analyzer = GoAnalyzer(rel_path, content)
902
+ elif language == "rust":
903
+ from .rust_analyzer import RustAnalyzer
904
+
905
+ analyzer = RustAnalyzer(rel_path, content)
906
+ elif language == "dart":
907
+ from .dart_analyzer import DartAnalyzer
908
+
909
+ analyzer = DartAnalyzer(rel_path, content)
910
+ elif language:
911
+ analyzer = GenericAnalyzer(rel_path, content, language)
912
+ else:
913
+ return []
914
+
915
+ return analyzer.analyze()
916
+
917
+ except Exception as e:
918
+ self.stats["errors"] += 1
919
+ print(f"Error analyzing {file_path}: {e}", file=sys.stderr)
920
+ return []
921
+
922
+ # Maximum time allowed for a scan operation (seconds)
923
+ SCAN_TIMEOUT = 30
924
+
925
+ def scan(self) -> dict[str, Any]:
926
+ """Scan the entire codebase and generate a code map.
927
+
928
+ Returns:
929
+ Dict containing the complete code map with files, index, and stats.
930
+ Includes 'scan_timeout': True if the operation was cut short.
931
+
932
+ Example:
933
+ >>> mapper = CodeNavigator('/my/project')
934
+ >>> result = mapper.scan()
935
+ >>> print(result.keys())
936
+ dict_keys(['version', 'root', 'generated_at', 'stats', 'files', 'index'])
937
+ """
938
+ mode = "git-tracked files" if self.git_only else "codebase"
939
+ print(f"Scanning {mode} at: {self.root_path}", file=sys.stderr)
940
+
941
+ if self.git_only:
942
+ if not self._git.available:
943
+ print("Warning: git not available, scanning all files", file=sys.stderr)
944
+ elif self._git_tracked_files:
945
+ print(f" Git tracked files: {len(self._git_tracked_files)}", file=sys.stderr)
946
+
947
+ scan_start = time.monotonic()
948
+ timed_out = False
949
+
950
+ for root, dirs, files in os.walk(self.root_path):
951
+ if time.monotonic() - scan_start > self.SCAN_TIMEOUT:
952
+ timed_out = True
953
+ print("Warning: scan timed out, returning partial results", file=sys.stderr)
954
+ break
955
+ dirs[:] = [d for d in dirs if not self.should_ignore(Path(root) / d)]
956
+
957
+ for file in files:
958
+ file_path = Path(root) / file
959
+ if self.should_ignore(file_path):
960
+ continue
961
+
962
+ # Skip if not git-tracked (when git_only mode is enabled)
963
+ if not self._is_git_tracked(file_path):
964
+ continue
965
+
966
+ language = self.get_language(file_path)
967
+ if language:
968
+ symbols = self.analyze_file(file_path)
969
+ self.symbols.extend(symbols)
970
+ self.stats["files_processed"] += 1
971
+
972
+ self.stats["symbols_found"] = len(self.symbols)
973
+ if timed_out:
974
+ self.stats["scan_timeout"] = True
975
+ return self.generate_map()
976
+
977
+ def get_current_file_hash(self, file_path: Path) -> str | None:
978
+ """Get the hash of a file's current content without full analysis.
979
+
980
+ Args:
981
+ file_path: Path to the file.
982
+
983
+ Returns:
984
+ Hash string, or None if file cannot be read.
985
+ """
986
+ try:
987
+ with open(file_path, encoding="utf-8", errors="ignore") as f:
988
+ content = f.read()
989
+ return self.hash_file(content)
990
+ except Exception:
991
+ return None
992
+
993
+ def scan_incremental(self, existing_map_path: str) -> dict[str, Any]:
994
+ """Incrementally update an existing code map.
995
+
996
+ Only re-analyzes files that have changed since the last scan.
997
+ This is much faster than a full scan for large codebases.
998
+
999
+ Args:
1000
+ existing_map_path: Path to the existing .codegraph.json file.
1001
+
1002
+ Returns:
1003
+ Dict containing the updated code map.
1004
+
1005
+ Example:
1006
+ >>> mapper = CodeNavigator('/my/project')
1007
+ >>> result = mapper.scan_incremental('.codegraph.json')
1008
+ >>> print(result['stats'])
1009
+ {'files_processed': 5, 'files_unchanged': 137, 'files_added': 2, ...}
1010
+ """
1011
+ # Load existing map - only extract 'files' to minimize memory usage
1012
+ # The full map can be large; we only need the files dict for comparison
1013
+ try:
1014
+ with open(existing_map_path, encoding="utf-8") as f:
1015
+ existing_map = json.load(f)
1016
+ # Extract only what we need, let the rest be garbage collected
1017
+ existing_files = existing_map.get("files", {})
1018
+ del existing_map # Explicit cleanup of the full map
1019
+ except (FileNotFoundError, json.JSONDecodeError) as e:
1020
+ print(f"Cannot load existing map ({e}), performing full scan", file=sys.stderr)
1021
+ return self.scan()
1022
+ print(f"Incremental scan at: {self.root_path}", file=sys.stderr)
1023
+ print(f"Existing map has {len(existing_files)} files", file=sys.stderr)
1024
+
1025
+ # Initialize incremental stats
1026
+ self.stats = {
1027
+ "files_processed": 0,
1028
+ "files_unchanged": 0,
1029
+ "files_added": 0,
1030
+ "files_modified": 0,
1031
+ "files_deleted": 0,
1032
+ "symbols_found": 0,
1033
+ "errors": 0,
1034
+ }
1035
+
1036
+ # Track which files we've seen in current scan
1037
+ current_files: dict[str, str] = {} # rel_path -> hash
1038
+
1039
+ # First pass: collect all current files and their hashes
1040
+ # Note: Files may be deleted/modified during walk (TOCTOU).
1041
+ # We handle this by checking existence and catching exceptions.
1042
+ for root, dirs, files in os.walk(self.root_path):
1043
+ dirs[:] = [d for d in dirs if not self.should_ignore(Path(root) / d)]
1044
+
1045
+ for file in files:
1046
+ file_path = Path(root) / file
1047
+ if self.should_ignore(file_path):
1048
+ continue
1049
+
1050
+ # Skip symlinks to prevent symlink attacks
1051
+ try:
1052
+ if file_path.is_symlink():
1053
+ continue
1054
+ except OSError:
1055
+ continue
1056
+
1057
+ language = self.get_language(file_path)
1058
+ if language:
1059
+ rel_path = str(file_path.relative_to(self.root_path))
1060
+ try:
1061
+ current_hash = self.get_current_file_hash(file_path)
1062
+ if current_hash:
1063
+ current_files[rel_path] = current_hash
1064
+ except OSError:
1065
+ # File disappeared or became inaccessible during scan
1066
+ pass
1067
+
1068
+ # Categorize files
1069
+ unchanged_files = []
1070
+ modified_files = []
1071
+ added_files = []
1072
+
1073
+ for rel_path, current_hash in current_files.items():
1074
+ if rel_path in existing_files:
1075
+ existing_hash = existing_files[rel_path].get("hash", "")
1076
+ if current_hash == existing_hash:
1077
+ unchanged_files.append(rel_path)
1078
+ else:
1079
+ modified_files.append(rel_path)
1080
+ else:
1081
+ added_files.append(rel_path)
1082
+
1083
+ # Files in existing map but not in current scan = deleted
1084
+ deleted_files = [f for f in existing_files if f not in current_files]
1085
+
1086
+ print(f" Unchanged: {len(unchanged_files)}", file=sys.stderr)
1087
+ print(f" Modified: {len(modified_files)}", file=sys.stderr)
1088
+ print(f" Added: {len(added_files)}", file=sys.stderr)
1089
+ print(f" Deleted: {len(deleted_files)}", file=sys.stderr)
1090
+
1091
+ # Preserve unchanged files' symbols
1092
+ for rel_path in unchanged_files:
1093
+ file_info = existing_files[rel_path]
1094
+ self.file_hashes[rel_path] = file_info.get("hash", "")
1095
+
1096
+ # Convert stored symbols back to Symbol objects
1097
+ for sym_data in file_info.get("symbols", []):
1098
+ symbol = Symbol(
1099
+ name=sym_data["name"],
1100
+ type=sym_data["type"],
1101
+ file_path=rel_path,
1102
+ line_start=sym_data["lines"][0],
1103
+ line_end=sym_data["lines"][1],
1104
+ signature=sym_data.get("signature"),
1105
+ docstring=sym_data.get("docstring"),
1106
+ parent=sym_data.get("parent"),
1107
+ dependencies=sym_data.get("deps") or [],
1108
+ decorators=sym_data.get("decorators") or [],
1109
+ truncated=sym_data.get("truncated", False),
1110
+ )
1111
+ self.symbols.append(symbol)
1112
+
1113
+ self.stats["files_unchanged"] = len(unchanged_files)
1114
+
1115
+ # Analyze modified and added files
1116
+ # Note: TOCTOU mitigation - files may have changed or been deleted
1117
+ # between the hash check and analysis. We handle this gracefully.
1118
+ files_to_analyze = modified_files + added_files
1119
+ for rel_path in files_to_analyze:
1120
+ file_path = self.root_path / rel_path
1121
+ try:
1122
+ # Check file still exists and is a regular file (not symlink)
1123
+ if not file_path.is_file() or file_path.is_symlink():
1124
+ # File was deleted or replaced with symlink between hash and analyze
1125
+ print(
1126
+ f" Skipping {rel_path}: file no longer exists or is symlink",
1127
+ file=sys.stderr,
1128
+ )
1129
+ self.stats["errors"] += 1
1130
+ continue
1131
+
1132
+ symbols = self.analyze_file(file_path)
1133
+ self.symbols.extend(symbols)
1134
+ self.stats["files_processed"] += 1
1135
+ except OSError as e:
1136
+ # File became inaccessible between hash check and analysis (TOCTOU)
1137
+ print(f" Skipping {rel_path}: {e}", file=sys.stderr)
1138
+ self.stats["errors"] += 1
1139
+ continue
1140
+
1141
+ self.stats["files_added"] = len(added_files)
1142
+ self.stats["files_modified"] = len(modified_files)
1143
+ self.stats["files_deleted"] = len(deleted_files)
1144
+ self.stats["symbols_found"] = len(self.symbols)
1145
+
1146
+ return self.generate_map()
1147
+
1148
+ def generate_map(self) -> dict[str, Any]:
1149
+ """Generate the code map structure from collected symbols.
1150
+
1151
+ Returns:
1152
+ Dict with version, root, timestamp, stats, files map, and symbol index.
1153
+ """
1154
+ # Start with all analyzed files (including those with no symbols)
1155
+ files_map: dict[str, dict[str, Any]] = {}
1156
+ for file_path, file_hash in self.file_hashes.items():
1157
+ files_map[file_path] = {
1158
+ "hash": file_hash,
1159
+ "symbols": [],
1160
+ }
1161
+
1162
+ # Add symbols to their respective files
1163
+ for symbol in self.symbols:
1164
+ if symbol.file_path not in files_map:
1165
+ files_map[symbol.file_path] = {
1166
+ "hash": self.file_hashes.get(symbol.file_path, ""),
1167
+ "symbols": [],
1168
+ }
1169
+ symbol_dict: dict[str, Any] = {
1170
+ "name": symbol.name,
1171
+ "type": symbol.type,
1172
+ "lines": [symbol.line_start, symbol.line_end],
1173
+ "signature": symbol.signature,
1174
+ "docstring": symbol.docstring,
1175
+ "parent": symbol.parent,
1176
+ "deps": symbol.dependencies[:10] if symbol.dependencies else None,
1177
+ "decorators": symbol.decorators if symbol.decorators else None,
1178
+ }
1179
+ # Only include truncated flag when True (keeps output compact)
1180
+ if symbol.truncated:
1181
+ symbol_dict["truncated"] = True
1182
+ files_map[symbol.file_path]["symbols"].append(symbol_dict)
1183
+
1184
+ symbol_index: dict[str, list[dict[str, Any]]] = {}
1185
+ for symbol in self.symbols:
1186
+ key = symbol.name.lower()
1187
+ if key not in symbol_index:
1188
+ symbol_index[key] = []
1189
+ symbol_index[key].append(
1190
+ {
1191
+ "file": symbol.file_path,
1192
+ "type": symbol.type,
1193
+ "lines": [symbol.line_start, symbol.line_end],
1194
+ "parent": symbol.parent,
1195
+ }
1196
+ )
1197
+
1198
+ return {
1199
+ "version": "1.0",
1200
+ "root": str(self.root_path),
1201
+ "generated_at": datetime.now().isoformat(),
1202
+ "stats": self.stats,
1203
+ "files": files_map,
1204
+ "index": symbol_index,
1205
+ }
1206
+
1207
+
1208
+ def add_map_arguments(parser: argparse.ArgumentParser) -> None:
1209
+ """Add map command arguments to a parser.
1210
+
1211
+ Args:
1212
+ parser: The argument parser to add arguments to.
1213
+ """
1214
+ parser.add_argument("path", help="Path to the codebase root directory")
1215
+ parser.add_argument(
1216
+ "-o",
1217
+ "--output",
1218
+ default=".codegraph.json",
1219
+ help="Output file path (default: .codegraph.json)",
1220
+ )
1221
+ parser.add_argument("-i", "--ignore", nargs="*", help="Additional patterns to ignore")
1222
+ parser.add_argument(
1223
+ "--incremental",
1224
+ action="store_true",
1225
+ help="Only update changed files (requires existing map)",
1226
+ )
1227
+ parser.add_argument(
1228
+ "--git-only",
1229
+ action="store_true",
1230
+ help="Only scan files tracked by git",
1231
+ )
1232
+ parser.add_argument(
1233
+ "--use-gitignore",
1234
+ action="store_true",
1235
+ help="Also ignore patterns from .gitignore",
1236
+ )
1237
+ parser.add_argument(
1238
+ "--compact", action="store_true", help="Output compact JSON (default: pretty-printed)"
1239
+ )
1240
+ parser.add_argument("--no-color", action="store_true", help="Disable colored output")
1241
+
1242
+
1243
+ def run_map(args: argparse.Namespace) -> None:
1244
+ """Execute the map command with parsed arguments.
1245
+
1246
+ Args:
1247
+ args: Parsed command-line arguments.
1248
+ """
1249
+ ignore_patterns = DEFAULT_IGNORE_PATTERNS.copy()
1250
+ if args.ignore:
1251
+ ignore_patterns.extend(args.ignore)
1252
+
1253
+ git_only = getattr(args, "git_only", False)
1254
+ use_gitignore = getattr(args, "use_gitignore", False)
1255
+
1256
+ mapper = CodeNavigator(
1257
+ args.path,
1258
+ ignore_patterns,
1259
+ git_only=git_only,
1260
+ use_gitignore=use_gitignore,
1261
+ )
1262
+
1263
+ output_path = args.output
1264
+ if not os.path.isabs(output_path):
1265
+ output_path = os.path.join(args.path, output_path)
1266
+
1267
+ # Use incremental scan if requested and existing map exists
1268
+ incremental = getattr(args, "incremental", False)
1269
+ if incremental and os.path.exists(output_path):
1270
+ code_map = mapper.scan_incremental(output_path)
1271
+ else:
1272
+ if incremental:
1273
+ print(f"No existing map at {output_path}, performing full scan", file=sys.stderr)
1274
+ code_map = mapper.scan()
1275
+
1276
+ with open(output_path, "w", encoding="utf-8") as f:
1277
+ if args.compact:
1278
+ json.dump(code_map, f, separators=(",", ":"))
1279
+ else:
1280
+ json.dump(code_map, f, indent=2)
1281
+
1282
+ c = get_colors(no_color=args.no_color)
1283
+ stats = code_map["stats"]
1284
+
1285
+ # Display appropriate message based on scan type
1286
+ if "files_unchanged" in stats:
1287
+ # Incremental scan
1288
+ print(f"\n{c.success('✓')} Code map updated: {c.cyan(output_path)}", file=sys.stderr)
1289
+ print(f" Unchanged: {c.dim(str(stats['files_unchanged']))}", file=sys.stderr)
1290
+ print(f" Modified: {c.yellow(str(stats['files_modified']))}", file=sys.stderr)
1291
+ print(f" Added: {c.green(str(stats['files_added']))}", file=sys.stderr)
1292
+ print(f" Deleted: {c.magenta(str(stats['files_deleted']))}", file=sys.stderr)
1293
+ print(f" Total symbols: {c.green(str(stats['symbols_found']))}", file=sys.stderr)
1294
+ else:
1295
+ # Full scan
1296
+ print(f"\n{c.success('✓')} Code map generated: {c.cyan(output_path)}", file=sys.stderr)
1297
+ print(f" Files processed: {c.green(str(stats['files_processed']))}", file=sys.stderr)
1298
+ print(f" Symbols found: {c.green(str(stats['symbols_found']))}", file=sys.stderr)
1299
+
1300
+ summary = {"output": output_path, "stats": stats}
1301
+ if args.compact:
1302
+ print(json.dumps(summary, separators=(",", ":")))
1303
+ else:
1304
+ print(json.dumps(summary, indent=2))
1305
+
1306
+
1307
+ def main():
1308
+ """Command-line interface for the code mapper.
1309
+
1310
+ Usage:
1311
+ codegraph-nav scan /path/to/project [-o OUTPUT] [-i IGNORE...] [--compact]
1312
+
1313
+ Example:
1314
+ $ codegraph-nav scan /my/project -o .codegraph.json
1315
+ """
1316
+ parser = argparse.ArgumentParser(
1317
+ description="Generate a code map for token-efficient navigation",
1318
+ epilog="Example: codegraph-nav scan /my/project -o .codegraph.json",
1319
+ )
1320
+ add_map_arguments(parser)
1321
+ parser.add_argument("-v", "--version", action="version", version=f"%(prog)s {__version__}")
1322
+
1323
+ args = parser.parse_args()
1324
+ run_map(args)
1325
+
1326
+
1327
+ if __name__ == "__main__":
1328
+ main()