codegraphcontext 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. codegraphcontext/cli/cli_helpers.py +92 -10
  2. codegraphcontext/cli/config_manager.py +42 -3
  3. codegraphcontext/cli/main.py +397 -45
  4. codegraphcontext/cli/setup_wizard.py +104 -4
  5. codegraphcontext/core/__init__.py +3 -3
  6. codegraphcontext/core/database.py +117 -44
  7. codegraphcontext/core/database_kuzu.py +351 -30
  8. codegraphcontext/core/watcher.py +42 -0
  9. codegraphcontext/server.py +30 -0
  10. codegraphcontext/tool_definitions.py +59 -0
  11. codegraphcontext/tools/code_finder.py +172 -28
  12. codegraphcontext/tools/datasources/__init__.py +1 -0
  13. codegraphcontext/tools/datasources/cassandra_ingester.py +99 -0
  14. codegraphcontext/tools/datasources/mysql_ingester.py +120 -0
  15. codegraphcontext/tools/datasources/redis_ingester.py +122 -0
  16. codegraphcontext/tools/graph_builder.py +760 -6
  17. codegraphcontext/tools/handlers/analysis_handlers.py +173 -0
  18. codegraphcontext/tools/handlers/indexing_handlers.py +46 -1
  19. codegraphcontext/tools/handlers/query_handlers.py +11 -2
  20. codegraphcontext/tools/indexing/discovery.py +26 -2
  21. codegraphcontext/tools/indexing/embeddings.py +266 -0
  22. codegraphcontext/tools/indexing/persistence/writer.py +567 -85
  23. codegraphcontext/tools/indexing/pipeline.py +119 -3
  24. codegraphcontext/tools/indexing/resolution/calls.py +2260 -70
  25. codegraphcontext/tools/indexing/resolution/inheritance.py +27 -15
  26. codegraphcontext/tools/indexing/resolution/post_resolution.py +205 -0
  27. codegraphcontext/tools/indexing/schema.py +22 -0
  28. codegraphcontext/tools/indexing/schema_contract.py +24 -0
  29. codegraphcontext/tools/indexing/scip_pipeline.py +69 -5
  30. codegraphcontext/tools/indexing/vector_resolver.py +137 -0
  31. codegraphcontext/tools/languages/csharp.py +5 -0
  32. codegraphcontext/tools/languages/go.py +6 -5
  33. codegraphcontext/tools/languages/gradle.py +115 -0
  34. codegraphcontext/tools/languages/java.py +641 -12
  35. codegraphcontext/tools/languages/kotlin.py +1653 -135
  36. codegraphcontext/tools/languages/maven.py +165 -0
  37. codegraphcontext/tools/languages/mybatis.py +185 -0
  38. codegraphcontext/tools/languages/php.py +18 -14
  39. codegraphcontext/tools/query_tool_languages/java_toolkit.py +250 -3
  40. codegraphcontext/tools/report_generator.py +368 -0
  41. codegraphcontext/tools/scip_indexer.py +547 -264
  42. codegraphcontext/tools/system.py +6 -2
  43. codegraphcontext/tools/type_utils.py +12 -0
  44. codegraphcontext/utils/tree_sitter_manager.py +86 -35
  45. codegraphcontext/viz/server.py +37 -3
  46. {codegraphcontext-0.4.6.dist-info → codegraphcontext-0.4.8.dist-info}/METADATA +50 -88
  47. {codegraphcontext-0.4.6.dist-info → codegraphcontext-0.4.8.dist-info}/RECORD +51 -39
  48. {codegraphcontext-0.4.6.dist-info → codegraphcontext-0.4.8.dist-info}/WHEEL +0 -0
  49. {codegraphcontext-0.4.6.dist-info → codegraphcontext-0.4.8.dist-info}/entry_points.txt +0 -0
  50. {codegraphcontext-0.4.6.dist-info → codegraphcontext-0.4.8.dist-info}/licenses/LICENSE +0 -0
  51. {codegraphcontext-0.4.6.dist-info → codegraphcontext-0.4.8.dist-info}/top_level.txt +0 -0
@@ -2,10 +2,12 @@ import asyncio
2
2
  import json
3
3
  import uuid
4
4
  import urllib.parse
5
+ from collections import Counter
5
6
  from pathlib import Path
6
7
  import time
7
8
  import os
8
9
  from typing import Optional, List, Dict, Any
10
+ import typer
9
11
  from rich.console import Console
10
12
  from rich.table import Table
11
13
  from rich.progress import (
@@ -24,12 +26,38 @@ from ..tools.code_finder import CodeFinder
24
26
  from ..tools.graph_builder import GraphBuilder
25
27
  from ..tools.package_resolver import get_local_package_path
26
28
  from ..utils.debug_log import info_logger, warning_logger
29
+ from ..core.database import Neo4jConnectionError
27
30
  from ..utils.repo_path import any_repo_matches_path
28
31
  from .config_manager import resolve_context, ResolvedContext, register_repo_in_context, ensure_first_run_bootstrap
29
32
 
30
33
  console = Console()
31
34
 
32
35
 
36
+ def _print_call_resolution_diagnostics(graph_builder: GraphBuilder, limit: int = 5) -> None:
37
+ diagnostics = getattr(graph_builder, "last_call_resolution_diagnostics", [])
38
+ if not diagnostics:
39
+ return
40
+
41
+ reason_counts = Counter(d.get("reason", "unknown") for d in diagnostics)
42
+ summary = ", ".join(
43
+ f"{reason}={count}" for reason, count in reason_counts.most_common()
44
+ )
45
+ console.print(
46
+ f"[yellow]Skipped {len(diagnostics)} unresolved call relationship(s): {summary}[/yellow]"
47
+ )
48
+ table = Table(show_header=True, header_style="bold magenta")
49
+ table.add_column("Call", style="cyan", overflow="fold")
50
+ table.add_column("Reason", style="yellow")
51
+ table.add_column("Location", style="dim", overflow="fold")
52
+ for diagnostic in diagnostics[:limit]:
53
+ table.add_row(
54
+ str(diagnostic.get("full_call_name") or ""),
55
+ str(diagnostic.get("reason") or ""),
56
+ f"{diagnostic.get('caller_file_path')}:{diagnostic.get('line_number')}",
57
+ )
58
+ console.print(table)
59
+
60
+
33
61
  def _initialize_services(cli_context_flag: Optional[str] = None) -> tuple[Any, Any, Any, ResolvedContext]:
34
62
  """
35
63
  Initializes and returns core service managers based on the resolved context.
@@ -50,9 +78,14 @@ def _initialize_services(cli_context_flag: Optional[str] = None) -> tuple[Any, A
50
78
 
51
79
  console.print("[dim]Initializing services and database connection...[/dim]")
52
80
  try:
53
- # Override the database backend with the context's specific choice
54
- if ctx.database:
55
- os.environ['CGC_RUNTIME_DB_TYPE'] = ctx.database
81
+ # Respect runtime/backend overrides. Context DB is only a default when
82
+ # neither runtime override nor DEFAULT_DATABASE is already set.
83
+ if (
84
+ ctx.database
85
+ and not os.getenv("CGC_RUNTIME_DB_TYPE")
86
+ and not os.getenv("DEFAULT_DATABASE")
87
+ ):
88
+ os.environ["DEFAULT_DATABASE"] = ctx.database
56
89
 
57
90
  # Pass the exact DB path resolved from the context
58
91
  db_manager = get_database_manager(db_path=ctx.db_path)
@@ -85,9 +118,35 @@ def _initialize_services(cli_context_flag: Optional[str] = None) -> tuple[Any, A
85
118
  console.print(f"[bold red]Critical Error:[/bold red] Both FalkorDB and KùzuDB failed: {kuzu_e}")
86
119
  return None, None, None, ctx
87
120
  else:
88
- console.print(f"[bold red]Database Connection Error:[/bold red] {e}")
89
- console.print("Please ensure your database is configured correctly or run 'cgc doctor'.")
90
- return None, None, None, ctx
121
+ selected_db = (
122
+ os.environ.get("CGC_RUNTIME_DB_TYPE")
123
+ or os.environ.get("DATABASE_TYPE")
124
+ or os.environ.get("DEFAULT_DATABASE")
125
+ or ""
126
+ ).lower()
127
+
128
+ if isinstance(e, Neo4jConnectionError):
129
+ console.print(f"[bold red]{e}[/bold red]")
130
+ allow_fallback = os.environ.get("CGC_ALLOW_NEO4J_FALLBACK", "false").lower() in {"1", "true", "yes", "on"}
131
+
132
+ if selected_db == "neo4j" and allow_fallback:
133
+ console.print("[cyan]Neo4j failed and CGC_ALLOW_NEO4J_FALLBACK=true. Falling back to KuzuDB...[/cyan]")
134
+ try:
135
+ from ..core.database_kuzu import KuzuDBManager
136
+ db_manager = KuzuDBManager()
137
+ db_manager.get_driver()
138
+ console.print("[green]✓[/green] Successfully switched to KuzuDB fallback")
139
+ except Exception as kuzu_e:
140
+ console.print(f"[bold red]Critical Error:[/bold red] Neo4j failed and KuzuDB fallback failed: {kuzu_e}")
141
+ return None, None, None, ctx
142
+ else:
143
+ if selected_db == "neo4j":
144
+ console.print("[yellow]Tip:[/yellow] To continue without Neo4j, rerun with --db kuzudb")
145
+ return None, None, None, ctx
146
+ else:
147
+ console.print(f"[bold red]Database Connection Error:[/bold red] {e}")
148
+ console.print("Please ensure your database is configured correctly or run 'cgc doctor'.")
149
+ return None, None, None, ctx
91
150
 
92
151
  # The GraphBuilder requires an event loop, even for synchronous-style execution
93
152
  try:
@@ -211,6 +270,7 @@ def index_helper(path: str, context: Optional[str] = None):
211
270
  asyncio.run(_run_index_with_progress(graph_builder, path_obj, is_dependency=False, cgcignore_path=ctx.cgcignore_path))
212
271
  time_end = time.time()
213
272
  elapsed = time_end - time_start
273
+ _print_call_resolution_diagnostics(graph_builder)
214
274
  console.print(f"[green]Successfully finished indexing: {path} in {elapsed:.2f} seconds[/green]")
215
275
 
216
276
  # Check if auto-watch is enabled
@@ -227,6 +287,7 @@ def index_helper(path: str, context: Optional[str] = None):
227
287
 
228
288
  except Exception as e:
229
289
  console.print(f"[bold red]An error occurred during indexing:[/bold red] {e}")
290
+ raise typer.Exit(code=1)
230
291
  finally:
231
292
  db_manager.close_driver()
232
293
 
@@ -257,6 +318,7 @@ def add_package_helper(package_name: str, language: str, context: Optional[str]
257
318
 
258
319
  try:
259
320
  asyncio.run(_run_index_with_progress(graph_builder, package_path, is_dependency=True, cgcignore_path=ctx.cgcignore_path))
321
+ _print_call_resolution_diagnostics(graph_builder)
260
322
  console.print(f"[green]Successfully finished indexing package: {package_name}[/green]")
261
323
  except Exception as e:
262
324
  console.print(f"[bold red]An error occurred during package indexing:[/bold red] {e}")
@@ -322,9 +384,11 @@ def cypher_helper(query: str, context: Optional[str] = None):
322
384
 
323
385
  db_manager, _, _, ctx = services
324
386
 
325
- # Replicating safety checks from MCPServer
387
+ # Replicating safety checks from MCPServer (using word boundaries to avoid false positives like 'createEmail')
388
+ import re
326
389
  forbidden_keywords = ['CREATE', 'MERGE', 'DELETE', 'SET', 'REMOVE', 'DROP', 'CALL apoc']
327
- if any(keyword in query.upper() for keyword in forbidden_keywords):
390
+ pattern = r'\b(' + '|'.join(forbidden_keywords) + r')\b'
391
+ if re.search(pattern, query, re.IGNORECASE):
328
392
  console.print("[bold red]Error: This command only supports read-only queries.[/bold red]")
329
393
  db_manager.close_driver()
330
394
  return
@@ -350,9 +414,11 @@ def cypher_helper_visual(query: str, context: Optional[str] = None):
350
414
 
351
415
  db_manager, _, _, ctx = services
352
416
 
353
- # Replicating safety checks from MCPServer
417
+ # Replicating safety checks from MCPServer (using word boundaries to avoid false positives like 'createEmail')
418
+ import re
354
419
  forbidden_keywords = ['CREATE', 'MERGE', 'DELETE', 'SET', 'REMOVE', 'DROP', 'CALL apoc']
355
- if any(keyword in query.upper() for keyword in forbidden_keywords):
420
+ pattern = r'\b(' + '|'.join(forbidden_keywords) + r')\b'
421
+ if re.search(pattern, query, re.IGNORECASE):
356
422
  console.print("[bold red]Error: This command only supports read-only queries.[/bold red]")
357
423
  db_manager.close_driver()
358
424
  return
@@ -508,9 +574,11 @@ def reindex_helper(path: str, context: Optional[str] = None):
508
574
  asyncio.run(_run_index_with_progress(graph_builder, path_obj, is_dependency=False, cgcignore_path=ctx.cgcignore_path))
509
575
  time_end = time.time()
510
576
  elapsed = time_end - time_start
577
+ _print_call_resolution_diagnostics(graph_builder)
511
578
  console.print(f"[green]Successfully re-indexed: {path} in {elapsed:.2f} seconds[/green]")
512
579
  except Exception as e:
513
580
  console.print(f"[bold red]An error occurred during re-indexing:[/bold red] {e}")
581
+ raise typer.Exit(code=1)
514
582
  finally:
515
583
  db_manager.close_driver()
516
584
 
@@ -637,6 +705,12 @@ def stats_helper(path: str = None, context: Optional[str] = None):
637
705
  class_count = session.run("MATCH (c:Class) RETURN count(c) as c").single()["c"]
638
706
  module_count = session.run("MATCH (m:Module) RETURN count(m) as c").single()["c"]
639
707
 
708
+ # Extended node types (PHP, Rust, Go, etc.)
709
+ interface_count = session.run("MATCH (i:Interface) RETURN count(i) as c").single()["c"]
710
+ trait_count = session.run("MATCH (t:Trait) RETURN count(t) as c").single()["c"]
711
+ struct_count = session.run("MATCH (s:Struct) RETURN count(s) as c").single()["c"]
712
+ enum_count = session.run("MATCH (e:Enum) RETURN count(e) as c").single()["c"]
713
+
640
714
  table = Table(show_header=True, header_style="bold magenta")
641
715
  table.add_column("Metric", style="cyan")
642
716
  table.add_column("Count", style="green", justify="right")
@@ -645,6 +719,14 @@ def stats_helper(path: str = None, context: Optional[str] = None):
645
719
  table.add_row("Files", str(file_count))
646
720
  table.add_row("Functions", str(func_count))
647
721
  table.add_row("Classes", str(class_count))
722
+ if interface_count > 0:
723
+ table.add_row("Interfaces", str(interface_count))
724
+ if trait_count > 0:
725
+ table.add_row("Traits", str(trait_count))
726
+ if struct_count > 0:
727
+ table.add_row("Structs", str(struct_count))
728
+ if enum_count > 0:
729
+ table.add_row("Enums", str(enum_count))
648
730
  table.add_row("Modules", str(module_count))
649
731
 
650
732
  console.print(table)
@@ -48,13 +48,18 @@ DEFAULT_CONFIG = {
48
48
  "INDEX_SOURCE": "true",
49
49
  # SCIP indexer feature flag (default off — existing Tree-sitter behaviour unchanged)
50
50
  "SCIP_INDEXER": "false",
51
- "SCIP_LANGUAGES": "python,typescript,go,rust,java",
51
+ "SCIP_LANGUAGES": "python,typescript,javascript,go,rust,java,dart,cpp,c,csharp",
52
52
  "SKIP_EXTERNAL_RESOLUTION": "false",
53
53
  # 0 = unlimited; any positive integer caps MCP tool response size.
54
54
  "MAX_TOOL_RESPONSE_TOKENS": "0",
55
55
  # JSON object mapping tool names to integer result-count limits.
56
56
  # Example: {"find_code": 20, "analyze_code_relationships": 10, "find_dead_code": 30}
57
57
  "TOOL_RESULT_LIMITS": "{}",
58
+ # Post-indexing resolution phases (default off)
59
+ "ENABLE_INHERIT_RESOLVE": "false",
60
+ "ENABLE_VECTOR_RESOLVE": "false",
61
+ "CGC_EMBEDDING_MODEL": "local",
62
+ "CGC_EMBEDDING_BATCH_SIZE": "256",
58
63
  }
59
64
 
60
65
  # Configuration key descriptions
@@ -80,10 +85,41 @@ CONFIG_DESCRIPTIONS = {
80
85
  "IGNORE_DIRS": "Comma-separated list of directory names to ignore during indexing",
81
86
  "INDEX_SOURCE": "Store full source code in graph database (for faster indexing use false, for better performance use true)",
82
87
  "SCIP_INDEXER": "Use SCIP-based indexing for higher accuracy call/inheritance resolution (requires scip-<lang> tools installed)",
83
- "SCIP_LANGUAGES": "Comma-separated languages to index via SCIP when SCIP_INDEXER=true (python,typescript,go,rust,java)",
88
+ "SCIP_LANGUAGES": "Comma-separated languages to index via SCIP when SCIP_INDEXER=true (python,typescript,javascript,go,rust,java,dart,cpp,c,csharp)",
84
89
  "SKIP_EXTERNAL_RESOLUTION": "Skip resolution attempts for external library method calls (recommended for enterprise large Java/Spring codebases)",
85
90
  "MAX_TOOL_RESPONSE_TOKENS": "Maximum tokens per MCP tool response (0 = unlimited). Truncates oversized payloads and appends a notice.",
86
91
  "TOOL_RESULT_LIMITS": "JSON object mapping tool names to max result counts, e.g. {\"find_code\": 20, \"analyze_code_relationships\": 10}. Missing keys use built-in defaults.",
92
+ # Post-indexing resolution phases
93
+ "ENABLE_INHERIT_RESOLVE": (
94
+ "[Phase 5] Re-resolve ambiguous same-file CALLS edges using the inheritance graph (INHERITS relationships). "
95
+ "When enabled, methods called on an interface or abstract class are re-pointed to the correct concrete "
96
+ "implementation based on the class hierarchy, reducing tier-7 fallback edges. "
97
+ "WHEN TO ENABLE: any Java/Kotlin/C# codebase that uses inheritance or interface-based DI (e.g. Spring, OSGi). "
98
+ "PREREQUISITES: run 'cgc index' first so INHERITS edges exist in the graph. No extra tools needed. "
99
+ "COST: adds ~1-5 min per 50K functions at the end of each 'cgc index' run. Safe to toggle on/off — only adds new edges, never removes existing ones."
100
+ ),
101
+ "ENABLE_VECTOR_RESOLVE": (
102
+ "[Phase 4 + Phase 5 tiebreaker] Generate semantic embeddings for all Function nodes and use vector "
103
+ "similarity as a tiebreaker when inheritance resolution alone cannot distinguish between multiple candidates. "
104
+ "Phase 4 writes a 384-dim embedding to every Function node; Phase 5 queries those embeddings during re-resolution. "
105
+ "WHEN TO ENABLE: large codebases (>10K functions) where inheritance alone leaves many ambiguous calls "
106
+ "(tier-7 fallbacks still high after ENABLE_INHERIT_RESOLVE). Also useful for cross-language repos. "
107
+ "PREREQUISITES: (1) fastembed must be installed — run 'pip install fastembed'. "
108
+ "(2) Neo4j must be the active database (vector index not supported on FalkorDB/KuzuDB). "
109
+ "(3) ENABLE_INHERIT_RESOLVE should also be true — vector is a tiebreaker for Phase 5, not a replacement. "
110
+ "COST: Phase 4 takes ~15 min per 50K functions on CPU (first run only; incremental updates are fast). "
111
+ "Embedding model (~40 MB) is downloaded automatically on first use from HuggingFace."
112
+ ),
113
+ "CGC_EMBEDDING_MODEL": (
114
+ "Embedding backend for ENABLE_VECTOR_RESOLVE. "
115
+ "'local' uses fastembed (BAAI/bge-small-en-v1.5, 384-dim, runs on CPU, no GPU or API key needed). "
116
+ "'openai' uses OpenAI text-embedding-3-small (requires OPENAI_API_KEY env var, costs money per token). "
117
+ "Default: local"
118
+ ),
119
+ "CGC_EMBEDDING_BATCH_SIZE": (
120
+ "Number of function texts to embed per batch when ENABLE_VECTOR_RESOLVE=true. "
121
+ "Larger values are faster but use more RAM. Default: 256. Reduce to 64 if you hit memory errors."
122
+ ),
87
123
  }
88
124
 
89
125
  # Valid values for each config key
@@ -101,6 +137,9 @@ CONFIG_VALIDATORS = {
101
137
  "INDEX_SOURCE": ["true", "false"],
102
138
  "SCIP_INDEXER": ["true", "false"],
103
139
  "SKIP_EXTERNAL_RESOLUTION": ["true", "false"],
140
+ "ENABLE_INHERIT_RESOLVE": ["true", "false"],
141
+ "ENABLE_VECTOR_RESOLVE": ["true", "false"],
142
+ "CGC_EMBEDDING_MODEL": ["local", "openai"],
104
143
  }
105
144
  DEFAULT_CGCIGNORE_PATTERNS = """\
106
145
  # Default .cgcignore patterns
@@ -820,7 +859,7 @@ def resolve_context(
820
859
  )
821
860
 
822
861
  # --- 4. Global fallback ---
823
- db = load_config().get("DEFAULT_DATABASE", "falkordb")
862
+ db = os.getenv("CGC_RUNTIME_DB_TYPE") or load_config().get("DEFAULT_DATABASE", "falkordb")
824
863
  return ResolvedContext(
825
864
  mode="global",
826
865
  context_name="",