codegraphcontext 0.4.10__py3-none-any.whl → 0.4.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. codegraphcontext/cli/config_manager.py +12 -3
  2. codegraphcontext/cli/main.py +132 -72
  3. codegraphcontext/cli/registry_commands.py +20 -8
  4. codegraphcontext/core/__init__.py +9 -2
  5. codegraphcontext/core/bundle_registry.py +7 -5
  6. codegraphcontext/core/cgc_bundle.py +67 -7
  7. codegraphcontext/tools/code_finder.py +17 -0
  8. codegraphcontext/tools/indexing/resolution/calls.py +43 -6
  9. codegraphcontext/utils/git_utils.py +13 -0
  10. codegraphcontext/viz/dist/assets/{index-BJT3EMmQ.js → index-C-187lf0.js} +363 -347
  11. codegraphcontext/viz/dist/assets/index-fNAa6jgv.css +1 -0
  12. codegraphcontext/viz/dist/assets/parser-pyodide.worker-BgsDfaad.js +370 -0
  13. codegraphcontext/viz/dist/assets/{parser.worker-CZgm11E5.js → parser.worker-_nvrecvj.js} +35 -10
  14. codegraphcontext/viz/dist/cgcIcon.png +0 -0
  15. codegraphcontext/viz/dist/index.html +3 -2
  16. codegraphcontext/viz/dist/logo-icon.svg +85 -0
  17. codegraphcontext/viz/dist/logo.svg +100 -0
  18. {codegraphcontext-0.4.10.dist-info → codegraphcontext-0.4.12.dist-info}/METADATA +5 -5
  19. {codegraphcontext-0.4.10.dist-info → codegraphcontext-0.4.12.dist-info}/RECORD +23 -19
  20. codegraphcontext/viz/dist/assets/index-DjDPHWki.css +0 -1
  21. {codegraphcontext-0.4.10.dist-info → codegraphcontext-0.4.12.dist-info}/WHEEL +0 -0
  22. {codegraphcontext-0.4.10.dist-info → codegraphcontext-0.4.12.dist-info}/entry_points.txt +0 -0
  23. {codegraphcontext-0.4.10.dist-info → codegraphcontext-0.4.12.dist-info}/licenses/LICENSE +0 -0
  24. {codegraphcontext-0.4.10.dist-info → codegraphcontext-0.4.12.dist-info}/top_level.txt +0 -0
@@ -30,6 +30,7 @@ DEFAULT_CONFIG = {
30
30
  "DEFAULT_DATABASE": "falkordb",
31
31
  "FALKORDB_PATH": str(CONFIG_DIR / "global" / "db" / "falkordb"),
32
32
  "FALKORDB_SOCKET_PATH": str(CONFIG_DIR / "global" / "db" / "falkordb.sock"),
33
+ "LADYBUGDB_PATH": str(CONFIG_DIR / "global" / "db" / "ladybugdb"),
33
34
  "INDEX_VARIABLES": "true",
34
35
  "ALLOW_DB_DELETION": "false",
35
36
  "DEBUG_LOGS": "false",
@@ -61,13 +62,16 @@ DEFAULT_CONFIG = {
61
62
  "ENABLE_VECTOR_RESOLVE": "false",
62
63
  "CGC_EMBEDDING_MODEL": "local",
63
64
  "CGC_EMBEDDING_BATCH_SIZE": "256",
65
+ # Default fuzzy matching behavior for `cgc find name` (overridable per-command with --fuzzy/--no-fuzzy)
66
+ "FUZZY_SEARCH": "true",
64
67
  }
65
68
 
66
69
  # Configuration key descriptions
67
70
  CONFIG_DESCRIPTIONS = {
68
- "DEFAULT_DATABASE": "Default database backend (neo4j|falkordb|kuzudb|nornic)",
71
+ "DEFAULT_DATABASE": "Default database backend (neo4j|falkordb|falkordb-remote|kuzudb|nornic|ladybugdb)",
69
72
  "FALKORDB_PATH": "Path to FalkorDB database file",
70
73
  "FALKORDB_SOCKET_PATH": "Path to FalkorDB Unix socket",
74
+ "LADYBUGDB_PATH": "Path to LadybugDB database directory",
71
75
  "INDEX_VARIABLES": "Index variable nodes in the graph (lighter graph if false)",
72
76
  "ALLOW_DB_DELETION": "Allow full database deletion commands",
73
77
  "DEBUG_LOGS": "Enable debug logging (for development/troubleshooting)",
@@ -121,11 +125,15 @@ CONFIG_DESCRIPTIONS = {
121
125
  "Number of function texts to embed per batch when ENABLE_VECTOR_RESOLVE=true. "
122
126
  "Larger values are faster but use more RAM. Default: 256. Reduce to 64 if you hit memory errors."
123
127
  ),
128
+ "FUZZY_SEARCH": (
129
+ "Enable fuzzy matching by default for `cgc find name` (true|false). "
130
+ "Per-invocation overrides are available via --fuzzy / --no-fuzzy."
131
+ ),
124
132
  }
125
133
 
126
134
  # Valid values for each config key
127
135
  CONFIG_VALIDATORS = {
128
- "DEFAULT_DATABASE": ["neo4j", "falkordb", "falkordb-remote", "kuzudb", "nornic"],
136
+ "DEFAULT_DATABASE": ["neo4j", "falkordb", "falkordb-remote", "kuzudb", "nornic", "ladybugdb"],
129
137
  "INDEX_VARIABLES": ["true", "false"],
130
138
  "ALLOW_DB_DELETION": ["true", "false"],
131
139
  "DEBUG_LOGS": ["true", "false"],
@@ -141,6 +149,7 @@ CONFIG_VALIDATORS = {
141
149
  "ENABLE_INHERIT_RESOLVE": ["true", "false"],
142
150
  "ENABLE_VECTOR_RESOLVE": ["true", "false"],
143
151
  "CGC_EMBEDDING_MODEL": ["local", "openai"],
152
+ "FUZZY_SEARCH": ["true", "false"],
144
153
  }
145
154
  DEFAULT_CGCIGNORE_PATTERNS = """\
146
155
  # Default .cgcignore patterns
@@ -445,7 +454,7 @@ def validate_config_value(key: str, value: str) -> tuple[bool, Optional[str]]:
445
454
  except Exception as e:
446
455
  return False, f"Cannot create log directory: {e}"
447
456
 
448
- if key in ("FALKORDB_PATH", "FALKORDB_SOCKET_PATH"):
457
+ if key in ("FALKORDB_PATH", "FALKORDB_SOCKET_PATH", "LADYBUGDB_PATH"):
449
458
  # Validate path is writable
450
459
  db_path = Path(normalize_config_path(value, absolute=True))
451
460
  try:
@@ -285,7 +285,7 @@ def context_default(
285
285
  # CREDENTIALS LOADING PRECEDENCE
286
286
  # ============================================================================
287
287
 
288
- def _load_credentials():
288
+ def _load_credentials(cli_context_flag: Optional[str] = None):
289
289
  """
290
290
  Loads configuration and credentials from various sources into environment variables.
291
291
  Uses per-variable precedence - each variable is loaded from the highest priority source.
@@ -336,7 +336,7 @@ def _load_credentials():
336
336
  mcp_file_path = Path.cwd() / "mcp.json"
337
337
  if mcp_file_path.exists():
338
338
  try:
339
- with open(mcp_file_path, "r") as f:
339
+ with open(mcp_file_path, "r", encoding="utf-8", errors="replace") as f:
340
340
  mcp_config = json.load(f)
341
341
  server_env = mcp_config.get("mcpServers", {}).get("CodeGraphContext", {}).get("env", {})
342
342
  if isinstance(server_env, dict):
@@ -355,7 +355,8 @@ def _load_credentials():
355
355
  global_env_path = Path.home() / ".codegraphcontext" / ".env"
356
356
  if global_env_path.exists():
357
357
  try:
358
- _append_source(str(global_env_path), dotenv_values(str(global_env_path)))
358
+ with open(global_env_path, "r", encoding="utf-8", errors="replace") as f:
359
+ _append_source(str(global_env_path), dotenv_values(stream=f))
359
360
  except Exception as e:
360
361
  console.print(f"[yellow]Warning: Could not load global .env: {e}[/yellow]")
361
362
 
@@ -363,7 +364,8 @@ def _load_credentials():
363
364
  try:
364
365
  dotenv_path = find_dotenv(usecwd=True, raise_error_if_not_found=False)
365
366
  if dotenv_path:
366
- _append_source(str(dotenv_path), dotenv_values(dotenv_path))
367
+ with open(dotenv_path, "r", encoding="utf-8", errors="replace") as f:
368
+ _append_source(str(dotenv_path), dotenv_values(stream=f))
367
369
  except Exception as e:
368
370
  console.print(f"[yellow]Warning: Could not load .env from current directory: {e}[/yellow]")
369
371
 
@@ -371,8 +373,9 @@ def _load_credentials():
371
373
  try:
372
374
  local_cgc_env = codegraphcontext_dotenv_at_cwd(Path.cwd())
373
375
  if local_cgc_env and local_cgc_env.resolve() != global_env_path.resolve():
374
- config_sources.append(dotenv_values(str(local_cgc_env)))
375
- config_source_names.append(str(local_cgc_env))
376
+ with open(local_cgc_env, "r", encoding="utf-8", errors="replace") as f:
377
+ vals = dotenv_values(stream=f)
378
+ _append_source(str(local_cgc_env), vals)
376
379
  except Exception as e:
377
380
  console.print(
378
381
  f"[yellow]Warning: Could not load .codegraphcontext/.env at cwd: {e}[/yellow]"
@@ -418,31 +421,62 @@ def _load_credentials():
418
421
  )
419
422
 
420
423
 
421
- # Show which database is actually being used.
422
- # When CGC_RUNTIME_DB_TYPE or DEFAULT_DATABASE is set, trust it. Otherwise
423
- # call get_database_manager() so the banner matches factory fallbacks.
424
- runtime_db = os.environ.get("CGC_RUNTIME_DB_TYPE")
425
- database_type = os.environ.get("DATABASE_TYPE")
426
- default_database = os.environ.get("DEFAULT_DATABASE")
424
+ # Detect the context to see if it specifies a custom database
425
+ if cli_context_flag is None:
426
+ import sys
427
+ for i, arg in enumerate(sys.argv):
428
+ if arg in ("--context", "-c"):
429
+ if i + 1 < len(sys.argv):
430
+ cli_context_flag = sys.argv[i + 1]
431
+ break
432
+ elif arg.startswith("--context="):
433
+ cli_context_flag = arg.split("=", 1)[1]
434
+ break
435
+
436
+ from codegraphcontext.cli.config_manager import resolve_context
437
+ ctx = None
438
+ try:
439
+ ctx = resolve_context(cli_context_flag)
440
+ except Exception:
441
+ pass
427
442
 
428
- explicit_db = runtime_db or database_type or default_database
443
+ # Determine if there is a runtime database override.
444
+ runtime_db = os.environ.get("CGC_RUNTIME_DB_TYPE")
445
+ has_runtime_override = (
446
+ runtime_db is not None
447
+ or "DATABASE_TYPE" in runtime_env
448
+ or "DEFAULT_DATABASE" in runtime_env
449
+ )
429
450
 
451
+ # If there is no runtime override, but the context defines a database,
452
+ # set DEFAULT_DATABASE to the context database to ensure that's what gets initialized.
453
+ if not has_runtime_override and ctx and ctx.mode != "global" and ctx.database:
454
+ os.environ["DEFAULT_DATABASE"] = ctx.database
455
+
456
+ # Now select the database based on precedence:
457
+ # 1. CGC_RUNTIME_DB_TYPE
458
+ # 2. DATABASE_TYPE or DEFAULT_DATABASE from runtime environment (shell variables)
459
+ # 3. Context database
460
+ # 4. DATABASE_TYPE or DEFAULT_DATABASE from merged config files (.env files)
461
+ # 5. Auto-detect fallback
430
462
  if runtime_db:
463
+ default_db = runtime_db.lower()
431
464
  db_source = "runtime-env (CGC_RUNTIME_DB_TYPE)"
432
465
  elif "DATABASE_TYPE" in runtime_env:
466
+ default_db = runtime_env["DATABASE_TYPE"].lower()
433
467
  db_source = "environment (DATABASE_TYPE)"
434
468
  elif "DEFAULT_DATABASE" in runtime_env:
469
+ default_db = runtime_env["DEFAULT_DATABASE"].lower()
435
470
  db_source = "environment (DEFAULT_DATABASE)"
436
- elif database_type and "DATABASE_TYPE" in key_source_map:
471
+ elif not has_runtime_override and ctx and ctx.mode != "global" and ctx.database:
472
+ default_db = ctx.database.lower()
473
+ db_source = f"context ({ctx.context_name or 'resolved'})"
474
+ elif os.environ.get("DATABASE_TYPE") and "DATABASE_TYPE" in key_source_map:
475
+ default_db = os.environ["DATABASE_TYPE"].lower()
437
476
  db_source = key_source_map["DATABASE_TYPE"]
438
- elif default_database and "DEFAULT_DATABASE" in key_source_map:
477
+ elif os.environ.get("DEFAULT_DATABASE") and "DEFAULT_DATABASE" in key_source_map:
478
+ default_db = os.environ["DEFAULT_DATABASE"].lower()
439
479
  db_source = key_source_map["DEFAULT_DATABASE"]
440
- else:
441
- db_source = "auto-detect"
442
- explicit_db = runtime_db or os.environ.get("DEFAULT_DATABASE")
443
-
444
- if explicit_db:
445
- default_db = explicit_db.lower()
446
480
  else:
447
481
  # No explicit choice — ask the factory which backend it will use
448
482
  try:
@@ -455,6 +489,7 @@ def _load_credentials():
455
489
  default_db = "falkordb" if _is_falkordb_available() else "kuzudb"
456
490
  db_source = "auto-detect"
457
491
 
492
+ # Print selection banner
458
493
  if default_db == "neo4j":
459
494
  has_neo4j_creds = all([
460
495
  os.environ.get("NEO4J_URI"),
@@ -473,20 +508,14 @@ def _load_credentials():
473
508
  console.print(f"[cyan]Using database: falkordb (source: {db_source})[/cyan]")
474
509
  elif default_db == "kuzudb":
475
510
  console.print(f"[cyan]Using database: kuzudb (source: {db_source})[/cyan]")
511
+ elif default_db == "ladybugdb":
512
+ console.print(f"[cyan]Using database: ladybugdb (source: {db_source})[/cyan]")
476
513
  elif default_db == "falkordb-remote":
477
514
  host = os.environ.get("FALKORDB_HOST")
478
515
  if host:
479
516
  console.print(f"[cyan]Using database: falkordb-remote (source: {db_source}, host: {host})[/cyan]")
480
517
  else:
481
518
  console.print("[yellow]⚠ DATABASE_TYPE=falkordb-remote but FALKORDB_HOST not set.[/yellow]")
482
- elif default_db == "falkordb":
483
- if os.environ.get("FALKORDB_HOST"):
484
- console.print(f"[cyan]Using database: falkordb-remote (source: {db_source}, host: {os.environ.get('FALKORDB_HOST')})[/cyan]")
485
- else:
486
- console.print(f"[cyan]Using database: falkordb (source: {db_source})[/cyan]")
487
- console.print(
488
- "[yellow]⚠ DEFAULT_DATABASE=falkordb-remote but FALKORDB_HOST not set.[/yellow]"
489
- )
490
519
  else:
491
520
  console.print(f"[cyan]Using database: {default_db} (source: {db_source})[/cyan]")
492
521
 
@@ -543,7 +572,7 @@ def config_reset():
543
572
  console.print("[yellow]Reset cancelled[/yellow]")
544
573
 
545
574
  @config_app.command("db")
546
- def config_db(backend: str = typer.Argument(..., help="Database backend: 'neo4j', 'falkordb', 'falkordb-remote', or 'kuzudb'")):
575
+ def config_db(backend: str = typer.Argument(..., help="Database backend: 'neo4j', 'falkordb', 'falkordb-remote', 'kuzudb', or 'ladybugdb'")):
547
576
  """
548
577
  Quickly switch the default database backend.
549
578
 
@@ -1388,17 +1417,22 @@ app.add_typer(find_app, name="find")
1388
1417
  @find_app.command("name")
1389
1418
  def find_by_name(
1390
1419
  ctx: typer.Context,
1391
- name: str = typer.Argument(..., help="Exact name to search for"),
1420
+ name: str = typer.Argument(..., help="Name to search for"),
1392
1421
  type: Optional[str] = typer.Option(None, "--type", "-t", help="Filter by type (function, class, file, module)"),
1422
+ fuzzy: Optional[bool] = typer.Option(None, "--fuzzy/--no-fuzzy", help="Enable/disable fuzzy matching for this command. Overrides the FUZZY_SEARCH config value (default: true)."),
1393
1423
  visual: bool = typer.Option(False, "--visual", "--viz", "-V", help="Show results as interactive graph visualization"),
1394
1424
  context: Optional[str] = typer.Option(None, "--context", "-c", help="Specific context to use"),
1395
1425
  ):
1396
1426
  """
1397
- Find code elements by exact name.
1398
-
1427
+ Find code elements by name.
1428
+
1429
+ Fuzzy matching is enabled by default (configurable via the FUZZY_SEARCH
1430
+ config key, or per-invocation with --fuzzy / --no-fuzzy).
1431
+
1399
1432
  Examples:
1400
1433
  cgc find name MyClass
1401
1434
  cgc find name calculate --type function
1435
+ cgc find name MyClass --no-fuzzy
1402
1436
  cgc find name MyClass --visual
1403
1437
  """
1404
1438
  _load_credentials()
@@ -1406,14 +1440,22 @@ def find_by_name(
1406
1440
  if not all(services[:3]):
1407
1441
  return
1408
1442
  db_manager, graph_builder, code_finder = services[:3]
1409
-
1443
+
1444
+ # Resolve effective fuzzy setting: CLI flag wins, else config, else true.
1445
+ if fuzzy is None:
1446
+ from codegraphcontext.cli.config_manager import load_config
1447
+ cfg_value = load_config().get("FUZZY_SEARCH", "true")
1448
+ fuzzy_search = str(cfg_value).strip().lower() == "true"
1449
+ else:
1450
+ fuzzy_search = fuzzy
1451
+
1410
1452
  try:
1411
1453
  results = []
1412
-
1454
+
1413
1455
  # Search based on type filter
1414
1456
  if type is None or type.lower() == 'all':
1415
- funcs = code_finder.find_by_function_name(name, fuzzy_search=False)
1416
- classes = code_finder.find_by_class_name(name, fuzzy_search=False)
1457
+ funcs = code_finder.find_by_function_name(name, fuzzy_search=fuzzy_search)
1458
+ classes = code_finder.find_by_class_name(name, fuzzy_search=fuzzy_search)
1417
1459
  variables = code_finder.find_by_variable_name(name)
1418
1460
  modules = code_finder.find_by_module_name(name)
1419
1461
  imports = code_finder.find_imports(name)
@@ -1445,11 +1487,11 @@ def find_by_name(
1445
1487
  results.append(row)
1446
1488
 
1447
1489
  elif type.lower() == 'function':
1448
- results = code_finder.find_by_function_name(name, fuzzy_search=False)
1490
+ results = code_finder.find_by_function_name(name, fuzzy_search=fuzzy_search)
1449
1491
  for r in results: r['type'] = 'Function'
1450
-
1492
+
1451
1493
  elif type.lower() == 'class':
1452
- results = code_finder.find_by_class_name(name, fuzzy_search=False)
1494
+ results = code_finder.find_by_class_name(name, fuzzy_search=fuzzy_search)
1453
1495
  for r in results: r['type'] = 'Class'
1454
1496
 
1455
1497
  elif type.lower() == 'variable':
@@ -2226,10 +2268,10 @@ def analyze_inheritance_tree(
2226
2268
 
2227
2269
  @analyze_app.command("complexity")
2228
2270
  def analyze_complexity(
2229
- path: Optional[str] = typer.Argument(None, help="Specific function name to analyze"),
2271
+ path: Optional[str] = typer.Argument(None, help="Function name or file path to analyze"),
2230
2272
  threshold: int = typer.Option(10, "--threshold", "-t", help="Complexity threshold for warnings"),
2231
2273
  limit: int = typer.Option(20, "--limit", "-l", help="Maximum results to show"),
2232
- file: Optional[str] = typer.Option(None, "--file", "-f", help="Specific file path (only used when function name is provided)"),
2274
+ file: Optional[str] = typer.Option(None, "--file", "-f", help="Specific file path to scope analysis"),
2233
2275
  context: Optional[str] = typer.Option(None, "--context", "-c", help="Specific context to use"),
2234
2276
  ):
2235
2277
  """
@@ -2240,16 +2282,55 @@ def analyze_complexity(
2240
2282
  cgc analyze complexity --threshold 15 # Functions over threshold
2241
2283
  cgc analyze complexity my_function # Specific function
2242
2284
  cgc analyze complexity my_function -f file.py # Specific function in file
2285
+ cgc analyze complexity src/main.py # Most complex functions in file
2286
+ cgc analyze complexity main.py # Most complex functions in file
2287
+ cgc analyze complexity --file src/main.py # Alternative file syntax
2243
2288
  """
2244
2289
  _load_credentials()
2245
2290
  services = _initialize_services(context)
2246
2291
  if not all(services[:3]):
2247
2292
  return
2248
2293
  db_manager, graph_builder, code_finder = services[:3]
2249
-
2294
+
2295
+ _FILE_EXTENSIONS = ('.py', '.js', '.ts', '.jsx', '.tsx', '.go', '.rs', '.rb',
2296
+ '.java', '.cpp', '.c', '.cs', '.swift', '.kt', '.scala',
2297
+ '.php', '.lua', '.zig', '.ex', '.exs', '.r', '.m', '.sh')
2298
+
2299
+ def _is_file_path(value: str) -> bool:
2300
+ if '/' in value or '\\' in value:
2301
+ return True
2302
+ return any(value.endswith(ext) for ext in _FILE_EXTENSIONS)
2303
+
2304
+ def _render_complexity_table(results, title):
2305
+ if not results:
2306
+ console.print("[yellow]No complexity data available for this file[/yellow]")
2307
+ return
2308
+ table = Table(show_header=True, header_style="bold magenta", box=box.ROUNDED)
2309
+ table.add_column("Function", style="cyan")
2310
+ table.add_column("Complexity", style="yellow", justify="right")
2311
+ table.add_column("Location", style="dim", overflow="fold")
2312
+ for func in results:
2313
+ complexity = func.get('complexity', 0)
2314
+ color = "red" if complexity > threshold else "yellow" if complexity > threshold/2 else "green"
2315
+ fpath = func.get('path', '')
2316
+ line_str = str(func.get('line_number', ''))
2317
+ location_str = f"{fpath}:{line_str}" if line_str else fpath
2318
+ table.add_row(
2319
+ func.get('function_name', ''),
2320
+ f"[{color}]{complexity}[/{color}]",
2321
+ location_str
2322
+ )
2323
+ console.print(f"\n[bold cyan]{title}[/bold cyan]")
2324
+ console.print(table)
2325
+ console.print(f"\n[dim]{len([f for f in results if f.get('complexity', 0) > threshold])} function(s) exceed threshold[/dim]")
2326
+
2250
2327
  try:
2251
- if path:
2252
- # Specific function
2328
+ if path and _is_file_path(path):
2329
+ # File path provided as positional argument
2330
+ results = code_finder.find_most_complex_functions_in_file(path, limit)
2331
+ _render_complexity_table(results, f"Most Complex Functions in '{path}' (threshold: {threshold}):")
2332
+ elif path:
2333
+ # Specific function name
2253
2334
  result = code_finder.get_cyclomatic_complexity(path, file)
2254
2335
  if result:
2255
2336
  console.print(f"\n[bold cyan]Complexity for '{path}':[/bold cyan]")
@@ -2258,35 +2339,14 @@ def analyze_complexity(
2258
2339
  console.print(f" Line: [dim]{result.get('line_number', '')}[/dim]")
2259
2340
  else:
2260
2341
  console.print(f"[yellow]Function '{path}' not found or has no complexity data[/yellow]")
2342
+ elif file:
2343
+ # --file option without positional arg
2344
+ results = code_finder.find_most_complex_functions_in_file(file, limit)
2345
+ _render_complexity_table(results, f"Most Complex Functions in '{file}' (threshold: {threshold}):")
2261
2346
  else:
2262
- # Most complex functions
2347
+ # Global - most complex functions
2263
2348
  results = code_finder.find_most_complex_functions(limit)
2264
-
2265
- if not results:
2266
- console.print("[yellow]No complexity data available[/yellow]")
2267
- return
2268
-
2269
- table = Table(show_header=True, header_style="bold magenta", box=box.ROUNDED)
2270
- table.add_column("Function", style="cyan")
2271
- table.add_column("Complexity", style="yellow", justify="right")
2272
- table.add_column("Location", style="dim", overflow="fold")
2273
-
2274
- for func in results:
2275
- complexity = func.get('complexity', 0)
2276
- color = "red" if complexity > threshold else "yellow" if complexity > threshold/2 else "green"
2277
- path = func.get('path', '')
2278
- line_str = str(func.get('line_number', ''))
2279
- location_str = f"{path}:{line_str}" if line_str else path
2280
-
2281
- table.add_row(
2282
- func.get('function_name', ''),
2283
- f"[{color}]{complexity}[/{color}]",
2284
- location_str
2285
- )
2286
-
2287
- console.print(f"\n[bold cyan]Most Complex Functions (threshold: {threshold}):[/bold cyan]")
2288
- console.print(table)
2289
- console.print(f"\n[dim]{len([f for f in results if f.get('complexity', 0) > threshold])} function(s) exceed threshold[/dim]")
2349
+ _render_complexity_table(results, f"Most Complex Functions (threshold: {threshold}):")
2290
2350
  finally:
2291
2351
  db_manager.close_driver()
2292
2352
 
@@ -238,11 +238,14 @@ def download_bundle(name: str, output_dir: Optional[str] = None, auto_load: bool
238
238
 
239
239
  # Determine output path
240
240
  bundle_filename = bundle.get('bundle_name', f"{name}.cgc")
241
+ is_base64 = download_url.endswith('.base64') or bundle_filename.endswith('.base64')
242
+ clean_filename = bundle_filename.replace('.base64', '')
243
+
241
244
  if output_dir:
242
- output_path = Path(output_dir) / bundle_filename
245
+ output_path = Path(output_dir) / clean_filename
243
246
  output_path.parent.mkdir(parents=True, exist_ok=True)
244
247
  else:
245
- output_path = Path.cwd() / bundle_filename
248
+ output_path = Path.cwd() / clean_filename
246
249
 
247
250
  # Check if already exists
248
251
  if output_path.exists():
@@ -257,7 +260,7 @@ def download_bundle(name: str, output_dir: Optional[str] = None, auto_load: bool
257
260
 
258
261
  # Download with progress bar
259
262
  try:
260
- console.print(f"[cyan]Downloading {bundle_filename}...[/cyan]")
263
+ console.print(f"[cyan]Downloading {clean_filename}...[/cyan]")
261
264
  console.print(f"[dim]From: {download_url}[/dim]")
262
265
 
263
266
  response = requests.get(download_url, stream=True, timeout=30)
@@ -272,11 +275,20 @@ def download_bundle(name: str, output_dir: Optional[str] = None, auto_load: bool
272
275
  ) as progress:
273
276
  task = progress.add_task(f"Downloading {bundle.get('size', 'unknown')}...", total=total_size)
274
277
 
275
- with open(output_path, 'wb') as f:
276
- for chunk in response.iter_content(chunk_size=8192):
277
- if chunk:
278
- f.write(chunk)
279
- progress.update(task, advance=len(chunk))
278
+ if is_base64:
279
+ # Read entire base64 response, decode it, and write it
280
+ base64_content = response.content.strip()
281
+ import base64
282
+ decoded_content = base64.b64decode(base64_content)
283
+ with open(output_path, 'wb') as f:
284
+ f.write(decoded_content)
285
+ progress.update(task, completed=total_size)
286
+ else:
287
+ with open(output_path, 'wb') as f:
288
+ for chunk in response.iter_content(chunk_size=8192):
289
+ if chunk:
290
+ f.write(chunk)
291
+ progress.update(task, advance=len(chunk))
280
292
 
281
293
  console.print(f"[bold green]✓ Downloaded successfully: {output_path}[/bold green]")
282
294
 
@@ -26,6 +26,13 @@ def _is_kuzudb_available() -> bool:
26
26
  except ImportError:
27
27
  return False
28
28
 
29
+ def _is_ladybugdb_available() -> bool:
30
+ """Check if LadybugDB is installed."""
31
+ try:
32
+ return importlib.util.find_spec("ladybug") is not None
33
+ except ImportError:
34
+ return False
35
+
29
36
  def _is_falkordb_available() -> bool:
30
37
  """Check if FalkorDB Lite is installed (Unix only)."""
31
38
  if platform.system() == "Windows":
@@ -127,8 +134,8 @@ def get_database_manager(db_path: Optional[str] = None) -> Union['DatabaseManage
127
134
  info_logger("Using Nornic DB (explicit)")
128
135
  return NornicDBManager()
129
136
  elif db_type == 'ladybugdb':
130
- if not _is_kuzudb_available():
131
- raise ValueError("Database set to 'ladybugdb' but LadybugDB core (kuzu) is not installed.\nRun 'pip install kuzu'")
137
+ if not _is_ladybugdb_available():
138
+ raise ValueError("Database set to 'ladybugdb' but LadybugDB is not installed.\nRun 'pip install ladybug'")
132
139
  from .database_ladybug import LadybugDBManager
133
140
  info_logger(f"Using LadybugDB (explicit) at {db_path or 'default path'}")
134
141
  return LadybugDBManager(db_path=db_path)
@@ -16,10 +16,12 @@ def _github_headers() -> dict:
16
16
  headers["Authorization"] = f"token {token}"
17
17
  return headers
18
18
 
19
- GITHUB_ORG = "CodeGraphContext"
20
- GITHUB_REPO = "CodeGraphContext"
21
- REGISTRY_API_URL = f"https://api.github.com/repos/{GITHUB_ORG}/{GITHUB_REPO}/releases"
22
- MANIFEST_URL = f"https://github.com/{GITHUB_ORG}/{GITHUB_REPO}/releases/download/on-demand-bundles/manifest.json"
19
+ def _get_manifest_url() -> str:
20
+ import os
21
+ hf_repo = os.environ.get("HF_REGISTRY_REPO") or "codegraphcontext/bundles"
22
+ return f"https://huggingface.co/datasets/{hf_repo}/raw/main/manifest.json"
23
+
24
+ REGISTRY_API_URL = "https://api.github.com/repos/CodeGraphContext/CodeGraphContext/releases"
23
25
 
24
26
  class BundleRegistry:
25
27
  """
@@ -38,7 +40,7 @@ class BundleRegistry:
38
40
 
39
41
  # 1. Fetch on-demand bundles from manifest
40
42
  try:
41
- response = requests.get(MANIFEST_URL, headers=_github_headers(), timeout=10)
43
+ response = requests.get(_get_manifest_url(), headers=_github_headers(), timeout=10)
42
44
  if response.status_code == 200:
43
45
  manifest = response.json()
44
46
  if manifest.get('bundles'):
@@ -28,7 +28,7 @@ from datetime import datetime, date
28
28
  import subprocess
29
29
 
30
30
  from codegraphcontext.utils.debug_log import debug_log, info_logger, error_logger, warning_logger
31
- from codegraphcontext.utils.git_utils import get_repo_commit_hash
31
+ from codegraphcontext.utils.git_utils import get_repo_commit_hash, get_repo_branch_name
32
32
 
33
33
 
34
34
  class _BundleEncoder(json.JSONEncoder):
@@ -106,11 +106,9 @@ class CGCBundle:
106
106
  with tempfile.TemporaryDirectory() as temp_dir:
107
107
  temp_path = Path(temp_dir)
108
108
 
109
- # Step 1: Extract metadata
109
+ # Step 1: Extract metadata base
110
110
  info_logger("Extracting metadata...")
111
111
  metadata = self._extract_metadata(repo_path)
112
- with open(temp_path / "metadata.json", 'w') as f:
113
- json.dump(metadata, f, indent=2, cls=_BundleEncoder)
114
112
 
115
113
  # Step 2: Extract schema
116
114
  info_logger("Extracting schema...")
@@ -126,12 +124,48 @@ class CGCBundle:
126
124
  info_logger("Extracting edges...")
127
125
  edge_count = self._extract_edges(temp_path / "edges.jsonl", repo_path)
128
126
 
129
- # Step 5: Generate statistics
127
+ # Step 5: Generate statistics and assemble standardized metadata
130
128
  if include_stats:
131
129
  info_logger("Generating statistics...")
132
130
  stats = self._generate_stats(repo_path, node_count, edge_count)
133
131
  with open(temp_path / "stats.json", 'w') as f:
134
132
  json.dump(stats, f, indent=2, cls=_BundleEncoder)
133
+ else:
134
+ stats = None
135
+
136
+ # Compile dynamic standardized metadata
137
+ try:
138
+ from importlib.metadata import version as get_version
139
+ py_version = get_version("codegraphcontext")
140
+ except Exception:
141
+ py_version = "0.4.12"
142
+
143
+ metadata["format_version"] = "1.0.0"
144
+ metadata["generator"] = f"PYv{py_version}"
145
+
146
+ # Timestamp format: YYYY-MM-DDTHH:MM:SSZ (UTC ISO String format)
147
+ # datetime.utcnow() was deprecated, using timezone-aware or simple UTC strftime
148
+ from datetime import timezone
149
+ metadata["exported_at"] = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
150
+
151
+ # Build name
152
+ if metadata.get("repo") and "/" in metadata["repo"]:
153
+ owner, repo_name = metadata["repo"].split("/", 1)
154
+ branch = metadata.get("branch", "main")
155
+ commit = metadata.get("commit", "latest")
156
+ metadata["name"] = f"{owner}__{repo_name}__{branch}__{commit}.cgc"
157
+ else:
158
+ foldername = metadata.get("repo", "unknown")
159
+ metadata["name"] = f"{foldername}.cgc"
160
+
161
+ metadata["graph_metrics"] = {
162
+ "total_nodes": node_count,
163
+ "total_edges": edge_count
164
+ }
165
+
166
+ # Save final metadata.json
167
+ with open(temp_path / "metadata.json", 'w') as f:
168
+ json.dump(metadata, f, indent=2, cls=_BundleEncoder)
135
169
 
136
170
  # Step 6: Create README
137
171
  self._create_readme(temp_path / "README.md", metadata, stats if include_stats else None)
@@ -280,8 +314,15 @@ class CGCBundle:
280
314
  if hasattr(node, attr):
281
315
  repo[attr] = getattr(node, attr)
282
316
 
283
- metadata["repo"] = repo.get('name', str(repo_path))
284
- metadata["repo_path"] = repo.get('path')
317
+ metadata["repo"] = repo.get('name', str(repo_path.name if repo_path else 'unknown'))
318
+ # Clean up absolute path prefix to keep it relative
319
+ meta_path = repo.get('path', '')
320
+ if repo_path and meta_path.startswith(str(repo_path.resolve())):
321
+ repo_str = str(repo_path.resolve())
322
+ rel = meta_path[len(repo_str):].lstrip('/')
323
+ metadata["repo_path"] = "./" + rel if rel else "."
324
+ else:
325
+ metadata["repo_path"] = meta_path
285
326
  metadata["is_dependency"] = repo.get('is_dependency', False)
286
327
  else:
287
328
  # All repositories
@@ -297,6 +338,9 @@ class CGCBundle:
297
338
  commit = get_repo_commit_hash(repo_path)
298
339
  if commit:
299
340
  metadata["commit"] = commit[:8]
341
+ branch = get_repo_branch_name(repo_path)
342
+ if branch:
343
+ metadata["branch"] = branch
300
344
 
301
345
  try:
302
346
  result = session.run("""
@@ -410,6 +454,14 @@ class CGCBundle:
410
454
  elif hasattr(node, 'properties'):
411
455
  node_dict = dict(node.properties)
412
456
 
457
+ # Clean up absolute path prefix to keep it relative
458
+ if repo_path:
459
+ repo_str = str(repo_path.resolve())
460
+ for key, val in list(node_dict.items()):
461
+ if isinstance(val, str) and val.startswith(repo_str):
462
+ rel = val[len(repo_str):].lstrip('/')
463
+ node_dict[key] = "./" + rel if rel else "."
464
+
413
465
  node_dict['_labels'] = labels
414
466
 
415
467
  # Store internal ID for reference
@@ -480,6 +532,14 @@ class CGCBundle:
480
532
  elif hasattr(rel, 'properties'):
481
533
  rel_props = dict(rel.properties)
482
534
 
535
+ # Clean up absolute path prefix inside edge properties
536
+ if repo_path:
537
+ repo_str = str(repo_path.resolve())
538
+ for key, val in list(rel_props.items()):
539
+ if isinstance(val, str) and val.startswith(repo_str):
540
+ rel = val[len(repo_str):].lstrip('/')
541
+ rel_props[key] = "./" + rel if rel else "."
542
+
483
543
  # Create edge representation
484
544
  edge_dict = {
485
545
  'from': from_id,