iflow-mcp_kandrwmrtn-cplusplus_mcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1042 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Pure Python C++ Analyzer using libclang
4
+
5
+ This module provides C++ code analysis functionality using libclang bindings.
6
+ It's slower than the C++ implementation but more reliable and easier to debug.
7
+ """
8
+
9
+ import os
10
+ import sys
11
+ import re
12
+ import time
13
+ import threading
14
+ from concurrent.futures import ThreadPoolExecutor, as_completed
15
+ from pathlib import Path
16
+ from typing import Dict, List, Optional, Any, Set, Tuple
17
+ from collections import defaultdict
18
+ import hashlib
19
+ import json
20
+ from .symbol_info import SymbolInfo
21
+ from .cache_manager import CacheManager
22
+ from .file_scanner import FileScanner
23
+ from .call_graph import CallGraphAnalyzer
24
+ from .search_engine import SearchEngine
25
+ from .cpp_analyzer_config import CppAnalyzerConfig
26
+
27
+ try:
28
+ import clang.cindex
29
+ from clang.cindex import Index, CursorKind, TranslationUnit, Config
30
+ except ImportError:
31
+ print("Error: clang package not found. Install with: pip install libclang", file=sys.stderr)
32
+ sys.exit(1)
33
+
34
+
35
+ class CppAnalyzer:
36
+ """
37
+ Pure Python C++ code analyzer using libclang.
38
+
39
+ This class provides code analysis functionality including:
40
+ - Class and struct discovery
41
+ - Function and method discovery
42
+ - Symbol search with regex patterns
43
+ - File-based filtering
44
+ """
45
+
46
+ def __init__(self, project_root: str):
47
+ self.project_root = Path(project_root).resolve()
48
+ self.index = Index.create()
49
+
50
+ # Load project configuration
51
+ self.config = CppAnalyzerConfig(self.project_root)
52
+
53
+ # Indexes for fast lookup
54
+ self.class_index: Dict[str, List[SymbolInfo]] = defaultdict(list)
55
+ self.function_index: Dict[str, List[SymbolInfo]] = defaultdict(list)
56
+ self.file_index: Dict[str, List[SymbolInfo]] = defaultdict(list)
57
+ self.usr_index: Dict[str, SymbolInfo] = {} # USR to symbol mapping
58
+
59
+ # Initialize call graph analyzer
60
+ self.call_graph_analyzer = CallGraphAnalyzer()
61
+
62
+ # Initialize search engine
63
+ self.search_engine = SearchEngine(
64
+ self.class_index,
65
+ self.function_index,
66
+ self.file_index,
67
+ self.usr_index
68
+ )
69
+
70
+ # Track indexed files
71
+ self.translation_units: Dict[str, TranslationUnit] = {}
72
+ self.file_hashes: Dict[str, str] = {}
73
+
74
+ # Threading
75
+ self.index_lock = threading.Lock()
76
+ self._thread_local = threading.local()
77
+ cpu_count = os.cpu_count() or 1
78
+ self.max_workers = max(1, min(16, cpu_count * 2))
79
+
80
+ # Initialize cache manager and file scanner with config
81
+ self.cache_manager = CacheManager(self.project_root)
82
+ self.file_scanner = FileScanner(self.project_root)
83
+
84
+ # Apply configuration to file scanner
85
+ self.file_scanner.EXCLUDE_DIRS = set(self.config.get_exclude_directories())
86
+ self.file_scanner.DEPENDENCY_DIRS = set(self.config.get_dependency_directories())
87
+
88
+ # Keep cache_dir for compatibility
89
+ self.cache_dir = self.cache_manager.cache_dir
90
+
91
+ # Statistics
92
+ self.last_index_time = 0
93
+ self.indexed_file_count = 0
94
+ self.include_dependencies = self.config.get_include_dependencies()
95
+
96
+ print(f"CppAnalyzer initialized for project: {self.project_root}", file=sys.stderr)
97
+ if self.config.config_path.exists():
98
+ print(f"Using project configuration from: {self.config.config_path}", file=sys.stderr)
99
+
100
+ def _get_file_hash(self, file_path: str) -> str:
101
+ """Get hash of file contents for change detection"""
102
+ return self.cache_manager.get_file_hash(file_path)
103
+
104
+ def _get_thread_index(self) -> Index:
105
+ """Return a thread-local libclang Index instance."""
106
+ index = getattr(self._thread_local, "index", None)
107
+ if index is None:
108
+ index = Index.create()
109
+ self._thread_local.index = index
110
+ return index
111
+
112
+ def _save_file_cache(self, file_path: str, symbols: List[SymbolInfo], file_hash: str):
113
+ """Save parsed symbols for a single file to cache"""
114
+ self.cache_manager.save_file_cache(file_path, symbols, file_hash)
115
+
116
+ def _load_file_cache(self, file_path: str, current_hash: str) -> Optional[List[SymbolInfo]]:
117
+ """Load cached symbols for a file if still valid"""
118
+ return self.cache_manager.load_file_cache(file_path, current_hash)
119
+
120
+ def _is_project_file(self, file_path: str) -> bool:
121
+ """Check if file is part of the project (not a dependency)"""
122
+ return self.file_scanner.is_project_file(file_path)
123
+
124
+ def _should_skip_file(self, file_path: str) -> bool:
125
+ """Check if file should be skipped"""
126
+ # Update file scanner with current dependencies setting
127
+ self.file_scanner.include_dependencies = self.include_dependencies
128
+ return self.file_scanner.should_skip_file(file_path)
129
+
130
+ def _find_cpp_files(self, include_dependencies: bool = False) -> List[str]:
131
+ """Find all C++ files in the project"""
132
+ # Update file scanner with dependencies setting
133
+ self.file_scanner.include_dependencies = include_dependencies
134
+ return self.file_scanner.find_cpp_files()
135
+
136
+ def _get_base_classes(self, cursor) -> List[str]:
137
+ """Extract base class names from a class cursor"""
138
+ base_classes = []
139
+ for child in cursor.get_children():
140
+ if child.kind == CursorKind.CXX_BASE_SPECIFIER:
141
+ # Get the referenced class name
142
+ base_type = child.type.spelling
143
+ # Clean up the type name (remove "class " prefix if present)
144
+ if base_type.startswith("class "):
145
+ base_type = base_type[6:]
146
+ base_classes.append(base_type)
147
+ return base_classes
148
+
149
+ def _process_cursor(self, cursor, file_filter: Optional[str] = None, parent_class: str = "", parent_function_usr: str = ""):
150
+ """Process a cursor and its children"""
151
+ # Skip if in different file than we're indexing
152
+ if cursor.location.file and file_filter:
153
+ if cursor.location.file.name != file_filter:
154
+ return
155
+
156
+ kind = cursor.kind
157
+
158
+ # Process classes and structs
159
+ if kind in (CursorKind.CLASS_DECL, CursorKind.STRUCT_DECL):
160
+ if cursor.spelling:
161
+ # Get base classes
162
+ base_classes = self._get_base_classes(cursor)
163
+
164
+ info = SymbolInfo(
165
+ name=cursor.spelling,
166
+ kind="class" if kind == CursorKind.CLASS_DECL else "struct",
167
+ file=cursor.location.file.name if cursor.location.file else "",
168
+ line=cursor.location.line,
169
+ column=cursor.location.column,
170
+ is_project=self._is_project_file(cursor.location.file.name) if cursor.location.file else False,
171
+ parent_class="", # Classes don't have parent classes in this context
172
+ base_classes=base_classes,
173
+ usr=cursor.get_usr() if cursor.get_usr() else ""
174
+ )
175
+
176
+ with self.index_lock:
177
+ self.class_index[info.name].append(info)
178
+ if info.usr:
179
+ self.usr_index[info.usr] = info
180
+ if info.file:
181
+ # Ensure file_index list exists
182
+ if info.file not in self.file_index:
183
+ self.file_index[info.file] = []
184
+ self.file_index[info.file].append(info)
185
+
186
+ # Process children of this class with the class as parent
187
+ for child in cursor.get_children():
188
+ self._process_cursor(child, file_filter, cursor.spelling)
189
+ return # Don't process children again below
190
+
191
+ # Process functions and methods
192
+ elif kind in (CursorKind.FUNCTION_DECL, CursorKind.CXX_METHOD):
193
+ if cursor.spelling:
194
+ # Get function signature
195
+ signature = ""
196
+ if cursor.type:
197
+ signature = cursor.type.spelling
198
+
199
+ function_usr = cursor.get_usr() if cursor.get_usr() else ""
200
+
201
+ info = SymbolInfo(
202
+ name=cursor.spelling,
203
+ kind="function" if kind == CursorKind.FUNCTION_DECL else "method",
204
+ file=cursor.location.file.name if cursor.location.file else "",
205
+ line=cursor.location.line,
206
+ column=cursor.location.column,
207
+ signature=signature,
208
+ is_project=self._is_project_file(cursor.location.file.name) if cursor.location.file else False,
209
+ parent_class=parent_class if kind == CursorKind.CXX_METHOD else "",
210
+ usr=function_usr
211
+ )
212
+
213
+ with self.index_lock:
214
+ self.function_index[info.name].append(info)
215
+ if info.usr:
216
+ self.usr_index[info.usr] = info
217
+ if info.file:
218
+ # Ensure file_index list exists
219
+ if info.file not in self.file_index:
220
+ self.file_index[info.file] = []
221
+ self.file_index[info.file].append(info)
222
+
223
+ # Process function body to find calls
224
+ for child in cursor.get_children():
225
+ self._process_cursor(child, file_filter, parent_class, function_usr)
226
+ return # Don't process children again below
227
+
228
+ # Process function calls within function bodies
229
+ elif kind == CursorKind.CALL_EXPR and parent_function_usr:
230
+ # This is a function call inside a function
231
+ referenced = cursor.referenced
232
+ if referenced and referenced.get_usr():
233
+ called_usr = referenced.get_usr()
234
+ # Track the call relationship
235
+ with self.index_lock:
236
+ self.call_graph_analyzer.add_call(parent_function_usr, called_usr)
237
+
238
+ # Recurse into children (with current parent_class and parent_function context)
239
+ for child in cursor.get_children():
240
+ self._process_cursor(child, file_filter, parent_class, parent_function_usr)
241
+
242
+ def index_file(self, file_path: str, force: bool = False) -> tuple[bool, bool]:
243
+ """Index a single C++ file
244
+
245
+ Returns:
246
+ (success, was_cached) - success indicates if indexing succeeded,
247
+ was_cached indicates if it was loaded from cache
248
+ """
249
+ file_path = os.path.abspath(file_path)
250
+ current_hash = self._get_file_hash(file_path)
251
+
252
+ # Try to load from per-file cache first
253
+ if not force:
254
+ cached_symbols = self._load_file_cache(file_path, current_hash)
255
+ if cached_symbols is not None:
256
+ # Apply cached symbols to indexes
257
+ with self.index_lock:
258
+ # Clear old entries for this file
259
+ if file_path in self.file_index:
260
+ for info in self.file_index[file_path]:
261
+ if info.kind in ("class", "struct"):
262
+ self.class_index[info.name] = [
263
+ i for i in self.class_index[info.name] if i.file != file_path
264
+ ]
265
+ else:
266
+ self.function_index[info.name] = [
267
+ i for i in self.function_index[info.name] if i.file != file_path
268
+ ]
269
+
270
+ # Add cached symbols
271
+ self.file_index[file_path] = cached_symbols
272
+ for symbol in cached_symbols:
273
+ if symbol.kind in ("class", "struct"):
274
+ self.class_index[symbol.name].append(symbol)
275
+ else:
276
+ self.function_index[symbol.name].append(symbol)
277
+
278
+ # Also update USR index
279
+ if symbol.usr:
280
+ self.usr_index[symbol.usr] = symbol
281
+
282
+ # Restore call graph relationships
283
+ if symbol.calls:
284
+ for called_usr in symbol.calls:
285
+ self.call_graph_analyzer.add_call(symbol.usr, called_usr)
286
+ if symbol.called_by:
287
+ for caller_usr in symbol.called_by:
288
+ self.call_graph_analyzer.add_call(caller_usr, symbol.usr)
289
+ self.file_hashes[file_path] = current_hash
290
+ return (True, True) # Successfully loaded from cache
291
+
292
+ try:
293
+ # Parse the file
294
+ args = [
295
+ '-std=c++17',
296
+ '-I.',
297
+ f'-I{self.project_root}',
298
+ f'-I{self.project_root}/src',
299
+ f'-I{self.project_root}/include',
300
+ '-DWIN32',
301
+ '-D_WIN32',
302
+ '-D_WINDOWS',
303
+ '-DNOMINMAX',
304
+ '-x', 'c++'
305
+ ]
306
+
307
+ # Add vcpkg includes if available
308
+ vcpkg_include = self.project_root / "vcpkg_installed" / "x64-windows" / "include"
309
+ if vcpkg_include.exists():
310
+ args.append(f'-I{vcpkg_include}')
311
+
312
+ # Add common vcpkg paths
313
+ vcpkg_paths = [
314
+ "C:/vcpkg/installed/x64-windows/include",
315
+ "C:/dev/vcpkg/installed/x64-windows/include"
316
+ ]
317
+ for path in vcpkg_paths:
318
+ if Path(path).exists():
319
+ args.append(f'-I{path}')
320
+ break
321
+
322
+ # Create translation unit with detailed diagnostics
323
+ # Note: We no longer skip function bodies to enable call graph analysis
324
+ index = self._get_thread_index()
325
+ tu = index.parse(
326
+ file_path,
327
+ args=args,
328
+ options=TranslationUnit.PARSE_INCOMPLETE |
329
+ TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD
330
+ )
331
+
332
+ if not tu:
333
+ print(f"Failed to parse {file_path}", file=sys.stderr)
334
+ return False
335
+
336
+ # Don't print diagnostics - too noisy for universal analyzer
337
+ # Just continue processing what we can parse
338
+
339
+ # Clear old entries for this file
340
+ with self.index_lock:
341
+ if file_path in self.file_index:
342
+ # Remove old entries from class and function indexes
343
+ for info in self.file_index[file_path]:
344
+ if info.kind in ("class", "struct"):
345
+ self.class_index[info.name] = [
346
+ i for i in self.class_index[info.name] if i.file != file_path
347
+ ]
348
+ else:
349
+ self.function_index[info.name] = [
350
+ i for i in self.function_index[info.name] if i.file != file_path
351
+ ]
352
+
353
+ self.file_index[file_path].clear()
354
+
355
+ # Collect symbols for this file
356
+ collected_symbols = []
357
+
358
+ # Process the translation unit (modifies indexes)
359
+ self._process_cursor(tu.cursor, file_path)
360
+
361
+ # Get the symbols we just added for this file
362
+ with self.index_lock:
363
+ if file_path in self.file_index:
364
+ collected_symbols = self.file_index[file_path].copy()
365
+
366
+ # Populate call graph info in symbols before caching
367
+ for symbol in collected_symbols:
368
+ if symbol.usr and symbol.kind in ("function", "method"):
369
+ # Add calls list
370
+ # Get calls from call graph analyzer
371
+ calls = self.call_graph_analyzer.find_callees(symbol.usr)
372
+ if calls:
373
+ symbol.calls = list(calls)
374
+ # Add called_by list
375
+ callers = self.call_graph_analyzer.find_callers(symbol.usr)
376
+ if callers:
377
+ symbol.called_by = list(callers)
378
+
379
+ # Save to per-file cache (even if empty - to mark as successfully parsed)
380
+ self._save_file_cache(file_path, collected_symbols, current_hash)
381
+
382
+ # Update tracking
383
+ with self.index_lock:
384
+ self.translation_units[file_path] = tu
385
+ self.file_hashes[file_path] = current_hash
386
+
387
+ return (True, False) # Success, not from cache
388
+
389
+ except Exception as e:
390
+ # Don't print full error for each file - too noisy
391
+ # Just return False to indicate failure
392
+ return (False, False) # Failed, not from cache
393
+
394
+ def index_project(self, force: bool = False, include_dependencies: bool = True) -> int:
395
+ """Index all C++ files in the project"""
396
+ start_time = time.time()
397
+
398
+ # Store the include_dependencies setting BEFORE loading cache
399
+ self.include_dependencies = include_dependencies
400
+
401
+ # Try to load from cache if not forcing
402
+ if not force and self._load_cache():
403
+ refreshed = self.refresh_if_needed()
404
+ if refreshed > 0:
405
+ print(f"Using cached index (updated {refreshed} files)", file=sys.stderr)
406
+ else:
407
+ print("Using cached index", file=sys.stderr)
408
+ return self.indexed_file_count
409
+
410
+ print(f"Finding C++ files (include_dependencies={include_dependencies})...", file=sys.stderr)
411
+ files = self._find_cpp_files(include_dependencies=include_dependencies)
412
+
413
+ if not files:
414
+ print("No C++ files found in project", file=sys.stderr)
415
+ return 0
416
+
417
+ print(f"Found {len(files)} C++ files to index", file=sys.stderr)
418
+
419
+ # Show detailed progress
420
+ indexed_count = 0
421
+ cache_hits = 0
422
+ failed_count = 0
423
+ last_report_time = time.time()
424
+
425
+ # Check if stderr is a terminal (for proper progress display)
426
+ # In MCP context or when output is redirected, use less frequent reporting
427
+ # Check multiple conditions to detect non-interactive environments
428
+ is_terminal = (hasattr(sys.stderr, 'isatty') and sys.stderr.isatty() and
429
+ not os.environ.get('MCP_SESSION_ID') and
430
+ not os.environ.get('CLAUDE_CODE_SESSION'))
431
+
432
+ # No special test mode needed - we'll handle Windows console properly
433
+
434
+ with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
435
+ future_to_file = {
436
+ executor.submit(self.index_file, os.path.abspath(file_path), force): os.path.abspath(file_path)
437
+ for file_path in files
438
+ }
439
+
440
+ for i, future in enumerate(as_completed(future_to_file)):
441
+ file_path = future_to_file[future]
442
+ try:
443
+ success, was_cached = future.result()
444
+ except Exception as exc:
445
+ print(f"Error indexing {file_path}: {exc}", file=sys.stderr)
446
+ success, was_cached = False, False
447
+
448
+ if success:
449
+ indexed_count += 1
450
+ if was_cached:
451
+ cache_hits += 1
452
+ else:
453
+ failed_count += 1
454
+
455
+ processed = i + 1
456
+
457
+ # Progress reporting
458
+ current_time = time.time()
459
+
460
+ if is_terminal:
461
+ should_report = (
462
+ (processed <= 5) or
463
+ (processed % 5 == 0) or
464
+ ((current_time - last_report_time) > 2.0) or
465
+ (processed == len(files))
466
+ )
467
+ else:
468
+ should_report = (
469
+ (processed % 50 == 0) or
470
+ ((current_time - last_report_time) > 5.0) or
471
+ (processed == len(files))
472
+ )
473
+
474
+ if should_report:
475
+ elapsed = current_time - start_time
476
+ rate = processed / elapsed if elapsed > 0 else 0
477
+ eta = (len(files) - processed) / rate if rate > 0 else 0
478
+
479
+ cache_rate = (cache_hits * 100 // processed) if processed > 0 else 0
480
+
481
+ if is_terminal:
482
+ progress_str = (
483
+ f"Progress: {processed}/{len(files)} files ({100 * processed // len(files)}%) - "
484
+ f"Success: {indexed_count} - Failed: {failed_count} - "
485
+ f"Cache: {cache_hits} ({cache_rate}%) - {rate:.1f} files/sec - ETA: {eta:.0f}s"
486
+ )
487
+ print(f"\033[2K\r{progress_str}", end='', file=sys.stderr, flush=True)
488
+ else:
489
+ print(
490
+ f"Progress: {processed}/{len(files)} files ({100 * processed // len(files)}%) - "
491
+ f"Success: {indexed_count} - Failed: {failed_count} - "
492
+ f"Cache: {cache_hits} ({cache_rate}%) - {rate:.1f} files/sec - ETA: {eta:.0f}s",
493
+ file=sys.stderr,
494
+ flush=True,
495
+ )
496
+
497
+ last_report_time = current_time
498
+
499
+ self.indexed_file_count = indexed_count
500
+ self.last_index_time = time.time() - start_time
501
+
502
+ with self.index_lock:
503
+ class_count = len(self.class_index)
504
+ function_count = len(self.function_index)
505
+
506
+ # Print newline after progress to move to next line (only if using terminal progress)
507
+ if is_terminal:
508
+ print("", file=sys.stderr)
509
+ print(f"Indexing complete in {self.last_index_time:.2f}s", file=sys.stderr)
510
+ print(f"Indexed {indexed_count}/{len(files)} files successfully ({cache_hits} from cache, {failed_count} failed)", file=sys.stderr)
511
+ print(f"Found {class_count} class names, {function_count} function names", file=sys.stderr)
512
+
513
+ if failed_count > 0:
514
+ print(f"Note: {failed_count} files failed to parse - this is normal for complex projects", file=sys.stderr)
515
+
516
+ # Save overall cache and progress summary
517
+ self._save_cache()
518
+ self._save_progress_summary(indexed_count, len(files), cache_hits, failed_count)
519
+
520
+ return indexed_count
521
+
522
+ def _save_cache(self):
523
+ """Save index to cache file"""
524
+ self.cache_manager.save_cache(
525
+ self.class_index,
526
+ self.function_index,
527
+ self.file_hashes,
528
+ self.indexed_file_count,
529
+ self.include_dependencies
530
+ )
531
+
532
+ def _load_cache(self) -> bool:
533
+ """Load index from cache file"""
534
+ cache_data = self.cache_manager.load_cache(self.include_dependencies)
535
+ if not cache_data:
536
+ return False
537
+
538
+ try:
539
+ # Load indexes
540
+ self.class_index.clear()
541
+ for name, infos in cache_data.get("class_index", {}).items():
542
+ self.class_index[name] = [SymbolInfo(**info) for info in infos]
543
+
544
+ self.function_index.clear()
545
+ for name, infos in cache_data.get("function_index", {}).items():
546
+ self.function_index[name] = [SymbolInfo(**info) for info in infos]
547
+
548
+ # Rebuild file index mapping from loaded symbols
549
+ self.file_index.clear()
550
+ for infos in self.class_index.values():
551
+ for symbol in infos:
552
+ if symbol.file:
553
+ self.file_index[symbol.file].append(symbol)
554
+ for infos in self.function_index.values():
555
+ for symbol in infos:
556
+ if symbol.file:
557
+ self.file_index[symbol.file].append(symbol)
558
+
559
+ self.file_hashes = cache_data.get("file_hashes", {})
560
+ self.indexed_file_count = cache_data.get("indexed_file_count", 0)
561
+
562
+ # Rebuild USR index and call graphs from loaded data
563
+ self.usr_index.clear()
564
+ self.call_graph_analyzer.clear()
565
+
566
+ # Rebuild from all loaded symbols
567
+ all_symbols = []
568
+ for class_list in self.class_index.values():
569
+ for symbol in class_list:
570
+ if symbol.usr:
571
+ self.usr_index[symbol.usr] = symbol
572
+ all_symbols.append(symbol)
573
+
574
+ for func_list in self.function_index.values():
575
+ for symbol in func_list:
576
+ if symbol.usr:
577
+ self.usr_index[symbol.usr] = symbol
578
+ all_symbols.append(symbol)
579
+
580
+ # Rebuild call graph from all symbols
581
+ self.call_graph_analyzer.rebuild_from_symbols(all_symbols)
582
+
583
+ print(f"Loaded cache with {len(self.class_index)} classes, {len(self.function_index)} functions",
584
+ file=sys.stderr)
585
+ return True
586
+
587
+ except Exception as e:
588
+ print(f"Error loading cache: {e}", file=sys.stderr)
589
+ return False
590
+
591
+ def _save_progress_summary(self, indexed_count: int, total_files: int, cache_hits: int, failed_count: int = 0):
592
+ """Save a summary of indexing progress"""
593
+ status = "complete" if indexed_count + failed_count == total_files else "interrupted"
594
+ self.cache_manager.save_progress(
595
+ total_files,
596
+ indexed_count,
597
+ failed_count,
598
+ cache_hits,
599
+ self.last_index_time,
600
+ len(self.class_index),
601
+ len(self.function_index),
602
+ status
603
+ )
604
+
605
+ def search_classes(self, pattern: str, project_only: bool = True) -> List[Dict[str, Any]]:
606
+ """Search for classes matching pattern"""
607
+ try:
608
+ return self.search_engine.search_classes(pattern, project_only)
609
+ except re.error as e:
610
+ print(f"Invalid regex pattern: {e}", file=sys.stderr)
611
+ return []
612
+
613
+ def search_functions(self, pattern: str, project_only: bool = True, class_name: Optional[str] = None) -> List[Dict[str, Any]]:
614
+ """Search for functions matching pattern, optionally within a specific class"""
615
+ try:
616
+ return self.search_engine.search_functions(pattern, project_only, class_name)
617
+ except re.error as e:
618
+ print(f"Invalid regex pattern: {e}", file=sys.stderr)
619
+ return []
620
+
621
+ def get_stats(self) -> Dict[str, int]:
622
+ """Get indexer statistics"""
623
+ with self.index_lock:
624
+ return {
625
+ "class_count": len(self.class_index),
626
+ "function_count": len(self.function_index),
627
+ "file_count": self.indexed_file_count
628
+ }
629
+
630
+ def refresh_if_needed(self) -> int:
631
+ """Refresh index for changed files and remove deleted files"""
632
+ refreshed = 0
633
+ deleted = 0
634
+
635
+ # Get currently existing files
636
+ current_files = set(self._find_cpp_files(self.include_dependencies))
637
+ tracked_files = set(self.file_hashes.keys())
638
+
639
+ # Find deleted files
640
+ deleted_files = tracked_files - current_files
641
+
642
+ # Remove deleted files from all indexes
643
+ for file_path in deleted_files:
644
+ self._remove_file_from_indexes(file_path)
645
+ # Remove from tracking
646
+ if file_path in self.file_hashes:
647
+ del self.file_hashes[file_path]
648
+ if file_path in self.translation_units:
649
+ del self.translation_units[file_path]
650
+ # Clean up per-file cache
651
+ self.cache_manager.remove_file_cache(file_path)
652
+ deleted += 1
653
+
654
+ # Check existing tracked files for modifications
655
+ for file_path in list(self.file_hashes.keys()):
656
+ if not os.path.exists(file_path):
657
+ continue # Skip files that no longer exist (should have been caught above)
658
+
659
+ current_hash = self._get_file_hash(file_path)
660
+ if current_hash != self.file_hashes.get(file_path):
661
+ success, _ = self.index_file(file_path, force=True)
662
+ if success:
663
+ refreshed += 1
664
+
665
+ # Check for new files
666
+ new_files = current_files - tracked_files
667
+ for file_path in new_files:
668
+ success, _ = self.index_file(file_path, force=False)
669
+ if success:
670
+ refreshed += 1
671
+
672
+ if refreshed > 0 or deleted > 0:
673
+ self._save_cache()
674
+ if deleted > 0:
675
+ print(f"Removed {deleted} deleted files from indexes", file=sys.stderr)
676
+
677
+ # Keep tracked file count in sync with current state
678
+ self.indexed_file_count = len(self.file_hashes)
679
+
680
+ return refreshed
681
+
682
+ def _remove_file_from_indexes(self, file_path: str):
683
+ """Remove all symbols from a deleted file from all indexes"""
684
+ with self.index_lock:
685
+ # Get all symbols that were in this file
686
+ symbols_to_remove = self.file_index.get(file_path, [])
687
+
688
+ # Remove from class_index
689
+ for symbol in symbols_to_remove:
690
+ if symbol.kind in ("class", "struct"):
691
+ if symbol.name in self.class_index:
692
+ self.class_index[symbol.name] = [
693
+ info for info in self.class_index[symbol.name]
694
+ if info.file != file_path
695
+ ]
696
+ # Remove empty entries
697
+ if not self.class_index[symbol.name]:
698
+ del self.class_index[symbol.name]
699
+
700
+ # Remove from function_index
701
+ elif symbol.kind in ("function", "method"):
702
+ if symbol.name in self.function_index:
703
+ self.function_index[symbol.name] = [
704
+ info for info in self.function_index[symbol.name]
705
+ if info.file != file_path
706
+ ]
707
+ # Remove empty entries
708
+ if not self.function_index[symbol.name]:
709
+ del self.function_index[symbol.name]
710
+
711
+ # Remove from usr_index
712
+ if symbol.usr and symbol.usr in self.usr_index:
713
+ del self.usr_index[symbol.usr]
714
+
715
+ # Remove from call graph
716
+ if symbol.usr:
717
+ self.call_graph_analyzer.remove_symbol(symbol.usr)
718
+
719
+ # Remove from file_index
720
+ if file_path in self.file_index:
721
+ del self.file_index[file_path]
722
+
723
+ def get_class_info(self, class_name: str) -> Optional[Dict[str, Any]]:
724
+ """Get detailed information about a specific class"""
725
+ return self.search_engine.get_class_info(class_name)
726
+
727
+ def get_function_signature(self, function_name: str, class_name: Optional[str] = None) -> List[str]:
728
+ """Get signature details for functions with given name, optionally within a specific class"""
729
+ return self.search_engine.get_function_signature(function_name, class_name)
730
+
731
+ def search_symbols(self, pattern: str, project_only: bool = True, symbol_types: Optional[List[str]] = None) -> Dict[str, List[Dict[str, Any]]]:
732
+ """
733
+ Search for all symbols (classes and functions) matching pattern.
734
+
735
+ Args:
736
+ pattern: Regex pattern to search for
737
+ project_only: Only include project files (exclude dependencies)
738
+ symbol_types: List of symbol types to include. Options: ['class', 'struct', 'function', 'method']
739
+ If None, includes all types.
740
+
741
+ Returns:
742
+ Dictionary with keys 'classes' and 'functions' containing matching symbols
743
+ """
744
+ try:
745
+ return self.search_engine.search_symbols(pattern, project_only, symbol_types)
746
+ except re.error as e:
747
+ print(f"Invalid regex pattern: {e}", file=sys.stderr)
748
+ return {"classes": [], "functions": []}
749
+
750
+ def get_derived_classes(self, class_name: str, project_only: bool = True) -> List[Dict[str, Any]]:
751
+ """
752
+ Get all classes that derive from the given class.
753
+
754
+ Args:
755
+ class_name: Name of the base class
756
+ project_only: Only include project classes (exclude dependencies)
757
+
758
+ Returns:
759
+ List of classes that inherit from the given class
760
+ """
761
+ derived_classes = []
762
+
763
+ with self.index_lock:
764
+ for name, infos in self.class_index.items():
765
+ for info in infos:
766
+ if not project_only or info.is_project:
767
+ # Check if this class inherits from the target class
768
+ if class_name in info.base_classes:
769
+ derived_classes.append({
770
+ "name": info.name,
771
+ "kind": info.kind,
772
+ "file": info.file,
773
+ "line": info.line,
774
+ "column": info.column,
775
+ "is_project": info.is_project,
776
+ "base_classes": info.base_classes
777
+ })
778
+
779
+ return derived_classes
780
+
781
+ def get_class_hierarchy(self, class_name: str) -> Dict[str, Any]:
782
+ """
783
+ Get the complete inheritance hierarchy for a class.
784
+
785
+ Args:
786
+ class_name: Name of the class to analyze
787
+
788
+ Returns:
789
+ Dictionary containing:
790
+ - class_info: Information about the class itself
791
+ - base_classes: Direct base classes
792
+ - derived_classes: Direct derived classes
793
+ - full_hierarchy: Complete hierarchy tree (recursive)
794
+ """
795
+ # Get the class info
796
+ class_info = self.get_class_info(class_name)
797
+ if not class_info:
798
+ return None
799
+
800
+ # Get direct base classes from the class info
801
+ base_classes = []
802
+ with self.index_lock:
803
+ for infos in self.class_index.get(class_name, []):
804
+ base_classes.extend(infos.base_classes)
805
+
806
+ # Remove duplicates
807
+ base_classes = list(set(base_classes))
808
+
809
+ # Get derived classes
810
+ derived_classes = self.get_derived_classes(class_name)
811
+
812
+ # Build the hierarchy
813
+ hierarchy = {
814
+ "class_info": class_info,
815
+ "base_classes": base_classes,
816
+ "derived_classes": derived_classes,
817
+ "base_hierarchy": self._get_base_hierarchy(class_name),
818
+ "derived_hierarchy": self._get_derived_hierarchy(class_name)
819
+ }
820
+
821
+ return hierarchy
822
+
823
+ def _get_base_hierarchy(self, class_name: str, visited: Optional[Set[str]] = None) -> Dict[str, Any]:
824
+ """Recursively get base class hierarchy"""
825
+ if visited is None:
826
+ visited = set()
827
+
828
+ if class_name in visited:
829
+ return {"name": class_name, "circular_reference": True}
830
+
831
+ visited.add(class_name)
832
+
833
+ # Get base classes for this class
834
+ base_classes = []
835
+ with self.index_lock:
836
+ for infos in self.class_index.get(class_name, []):
837
+ base_classes.extend(infos.base_classes)
838
+
839
+ base_classes = list(set(base_classes))
840
+
841
+ # Recursively get hierarchy for each base class
842
+ base_hierarchies = []
843
+ for base in base_classes:
844
+ base_hierarchies.append(self._get_base_hierarchy(base, visited.copy()))
845
+
846
+ return {
847
+ "name": class_name,
848
+ "base_classes": base_hierarchies
849
+ }
850
+
851
+ def _get_derived_hierarchy(self, class_name: str, visited: Optional[Set[str]] = None) -> Dict[str, Any]:
852
+ """Recursively get derived class hierarchy"""
853
+ if visited is None:
854
+ visited = set()
855
+
856
+ if class_name in visited:
857
+ return {"name": class_name, "circular_reference": True}
858
+
859
+ visited.add(class_name)
860
+
861
+ # Get derived classes
862
+ derived = self.get_derived_classes(class_name, project_only=False)
863
+
864
+ # Recursively get hierarchy for each derived class
865
+ derived_hierarchies = []
866
+ for d in derived:
867
+ derived_hierarchies.append(self._get_derived_hierarchy(d["name"], visited.copy()))
868
+
869
+ return {
870
+ "name": class_name,
871
+ "derived_classes": derived_hierarchies
872
+ }
873
+
874
+ def find_callers(self, function_name: str, class_name: str = "") -> List[Dict[str, Any]]:
875
+ """Find all functions that call the specified function"""
876
+ results = []
877
+
878
+ # Find the target function(s)
879
+ target_functions = self.search_functions(f"^{re.escape(function_name)}$",
880
+ project_only=False,
881
+ class_name=class_name)
882
+
883
+ # Collect USRs of target functions
884
+ target_usrs = set()
885
+ for func in target_functions:
886
+ # Find the full symbol info with USR
887
+ for symbol in self.function_index.get(func['name'], []):
888
+ if symbol.usr and symbol.file == func['file'] and symbol.line == func['line']:
889
+ target_usrs.add(symbol.usr)
890
+
891
+ # Find all callers
892
+ for usr in target_usrs:
893
+ callers = self.call_graph_analyzer.find_callers(usr)
894
+ for caller_usr in callers:
895
+ if caller_usr in self.usr_index:
896
+ caller_info = self.usr_index[caller_usr]
897
+ results.append({
898
+ "name": caller_info.name,
899
+ "kind": caller_info.kind,
900
+ "file": caller_info.file,
901
+ "line": caller_info.line,
902
+ "column": caller_info.column,
903
+ "signature": caller_info.signature,
904
+ "parent_class": caller_info.parent_class,
905
+ "is_project": caller_info.is_project
906
+ })
907
+
908
+ return results
909
+
910
+ def find_callees(self, function_name: str, class_name: str = "") -> List[Dict[str, Any]]:
911
+ """Find all functions called by the specified function"""
912
+ results = []
913
+
914
+ # Find the target function(s)
915
+ target_functions = self.search_functions(f"^{re.escape(function_name)}$",
916
+ project_only=False,
917
+ class_name=class_name)
918
+
919
+ # Collect USRs of target functions
920
+ target_usrs = set()
921
+ for func in target_functions:
922
+ # Find the full symbol info with USR
923
+ for symbol in self.function_index.get(func['name'], []):
924
+ if symbol.usr and symbol.file == func['file'] and symbol.line == func['line']:
925
+ target_usrs.add(symbol.usr)
926
+
927
+ # Find all callees
928
+ for usr in target_usrs:
929
+ callees = self.call_graph_analyzer.find_callees(usr)
930
+ for callee_usr in callees:
931
+ if callee_usr in self.usr_index:
932
+ callee_info = self.usr_index[callee_usr]
933
+ results.append({
934
+ "name": callee_info.name,
935
+ "kind": callee_info.kind,
936
+ "file": callee_info.file,
937
+ "line": callee_info.line,
938
+ "column": callee_info.column,
939
+ "signature": callee_info.signature,
940
+ "parent_class": callee_info.parent_class,
941
+ "is_project": callee_info.is_project
942
+ })
943
+
944
+ return results
945
+
946
+ def get_call_path(self, from_function: str, to_function: str, max_depth: int = 10) -> List[List[str]]:
947
+ """Find call paths from one function to another using BFS"""
948
+ # Find source and target USRs
949
+ from_funcs = self.search_functions(f"^{re.escape(from_function)}$", project_only=False)
950
+ to_funcs = self.search_functions(f"^{re.escape(to_function)}$", project_only=False)
951
+
952
+ if not from_funcs or not to_funcs:
953
+ return []
954
+
955
+ # Get USRs
956
+ from_usrs = set()
957
+ for func in from_funcs:
958
+ for symbol in self.function_index.get(func['name'], []):
959
+ if symbol.usr and symbol.file == func['file'] and symbol.line == func['line']:
960
+ from_usrs.add(symbol.usr)
961
+
962
+ to_usrs = set()
963
+ for func in to_funcs:
964
+ for symbol in self.function_index.get(func['name'], []):
965
+ if symbol.usr and symbol.file == func['file'] and symbol.line == func['line']:
966
+ to_usrs.add(symbol.usr)
967
+
968
+ # BFS to find paths
969
+ paths = []
970
+ for from_usr in from_usrs:
971
+ # Queue contains (current_usr, path)
972
+ queue = [(from_usr, [from_usr])]
973
+ visited = {from_usr}
974
+ depth = 0
975
+
976
+ while queue and depth < max_depth:
977
+ next_queue = []
978
+ for current_usr, path in queue:
979
+ # Check if we reached the target
980
+ if current_usr in to_usrs:
981
+ # Convert path of USRs to function names
982
+ name_path = []
983
+ for usr in path:
984
+ if usr in self.usr_index:
985
+ info = self.usr_index[usr]
986
+ name_path.append(f"{info.parent_class}::{info.name}" if info.parent_class else info.name)
987
+ paths.append(name_path)
988
+ continue
989
+
990
+ # Explore callees
991
+ for callee_usr in self.call_graph_analyzer.find_callees(current_usr):
992
+ if callee_usr not in visited:
993
+ visited.add(callee_usr)
994
+ next_queue.append((callee_usr, path + [callee_usr]))
995
+
996
+ queue = next_queue
997
+ depth += 1
998
+
999
+ return paths
1000
+
1001
+ def find_in_file(self, file_path: str, pattern: str) -> List[Dict[str, Any]]:
1002
+ """Search for symbols within a specific file"""
1003
+ results = []
1004
+
1005
+ # Search in both class and function results
1006
+ all_classes = self.search_classes(pattern, project_only=False)
1007
+ all_functions = self.search_functions(pattern, project_only=False)
1008
+
1009
+ # Filter by file path
1010
+ abs_file_path = str(Path(file_path).resolve())
1011
+
1012
+ for item in all_classes + all_functions:
1013
+ item_file = str(Path(item['file']).resolve()) if item['file'] else ""
1014
+ if item_file == abs_file_path or item['file'].endswith(file_path):
1015
+ results.append(item)
1016
+
1017
+ return results
1018
+
1019
+
1020
+ # Create factory function for compatibility
1021
+ def create_analyzer(project_root: str) -> CppAnalyzer:
1022
+ """Factory function to create a C++ analyzer"""
1023
+ return CppAnalyzer(project_root)
1024
+
1025
+
1026
+ # Test function
1027
+ if __name__ == "__main__":
1028
+ print("Testing Python CppAnalyzer...")
1029
+ analyzer = CppAnalyzer(".")
1030
+
1031
+ # Try to load from cache first
1032
+ if not analyzer._load_cache():
1033
+ analyzer.index_project()
1034
+
1035
+ stats = analyzer.get_stats()
1036
+ print(f"Stats: {stats}")
1037
+
1038
+ classes = analyzer.search_classes(".*", project_only=True)
1039
+ print(f"Found {len(classes)} project classes")
1040
+
1041
+ functions = analyzer.search_functions(".*", project_only=True)
1042
+ print(f"Found {len(functions)} project functions")