codegraph-cli 2.0.0__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codegraph_cli/__init__.py +1 -1
- codegraph_cli/agents.py +1 -1
- codegraph_cli/cli.py +6 -0
- codegraph_cli/cli_chat.py +2 -2
- codegraph_cli/cli_setup.py +158 -0
- codegraph_cli/config.py +6 -1
- codegraph_cli/config_manager.py +70 -20
- codegraph_cli/context_manager.py +1 -1
- codegraph_cli/embeddings.py +268 -100
- codegraph_cli/orchestrator.py +2 -2
- codegraph_cli/rag.py +3 -3
- {codegraph_cli-2.0.0.dist-info → codegraph_cli-2.1.0.dist-info}/METADATA +7 -4
- {codegraph_cli-2.0.0.dist-info → codegraph_cli-2.1.0.dist-info}/RECORD +17 -17
- {codegraph_cli-2.0.0.dist-info → codegraph_cli-2.1.0.dist-info}/WHEEL +0 -0
- {codegraph_cli-2.0.0.dist-info → codegraph_cli-2.1.0.dist-info}/entry_points.txt +0 -0
- {codegraph_cli-2.0.0.dist-info → codegraph_cli-2.1.0.dist-info}/licenses/LICENSE +0 -0
- {codegraph_cli-2.0.0.dist-info → codegraph_cli-2.1.0.dist-info}/top_level.txt +0 -0
codegraph_cli/__init__.py
CHANGED
codegraph_cli/agents.py
CHANGED
|
@@ -6,7 +6,7 @@ from collections import deque
|
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
from typing import Dict, List, Set
|
|
8
8
|
|
|
9
|
-
from .embeddings import HashEmbeddingModel
|
|
9
|
+
from .embeddings import HashEmbeddingModel, TransformerEmbedder
|
|
10
10
|
from .llm import LocalLLM
|
|
11
11
|
from .models import ImpactReport
|
|
12
12
|
from .parser import PythonGraphParser
|
codegraph_cli/cli.py
CHANGED
|
@@ -10,6 +10,7 @@ import typer
|
|
|
10
10
|
from . import __version__, config
|
|
11
11
|
from .cli_chat import chat_app
|
|
12
12
|
from .cli_setup import setup as setup_wizard, set_llm, unset_llm, show_llm
|
|
13
|
+
from .cli_setup import set_embedding, unset_embedding, show_embedding
|
|
13
14
|
from .cli_v2 import v2_app
|
|
14
15
|
from .graph_export import export_dot, export_html
|
|
15
16
|
from .orchestrator import MCPOrchestrator
|
|
@@ -35,6 +36,11 @@ app.command("set-llm")(set_llm)
|
|
|
35
36
|
app.command("unset-llm")(unset_llm)
|
|
36
37
|
app.command("show-llm")(show_llm)
|
|
37
38
|
|
|
39
|
+
# Register embedding management commands
|
|
40
|
+
app.command("set-embedding")(set_embedding)
|
|
41
|
+
app.command("unset-embedding")(unset_embedding)
|
|
42
|
+
app.command("show-embedding")(show_embedding)
|
|
43
|
+
|
|
38
44
|
|
|
39
45
|
def version_callback(value: bool):
|
|
40
46
|
"""Print version and exit."""
|
codegraph_cli/cli_chat.py
CHANGED
|
@@ -281,7 +281,7 @@ def start_chat(
|
|
|
281
281
|
new_session: bool = typer.Option(False, "--new", "-n", help="Force start a new session"),
|
|
282
282
|
):
|
|
283
283
|
"""Start interactive chat session."""
|
|
284
|
-
from .embeddings import
|
|
284
|
+
from .embeddings import get_embedder
|
|
285
285
|
from .project_context import ProjectContext
|
|
286
286
|
|
|
287
287
|
pm = ProjectManager()
|
|
@@ -294,7 +294,7 @@ def start_chat(
|
|
|
294
294
|
|
|
295
295
|
# Initialize components
|
|
296
296
|
context = ProjectContext(project, pm)
|
|
297
|
-
embedding_model =
|
|
297
|
+
embedding_model = get_embedder()
|
|
298
298
|
llm = LocalLLM(model=llm_model, provider=llm_provider, api_key=llm_api_key, endpoint=llm_endpoint)
|
|
299
299
|
rag_retriever = RAGRetriever(context.store, embedding_model)
|
|
300
300
|
|
codegraph_cli/cli_setup.py
CHANGED
|
@@ -8,6 +8,7 @@ from typing import Optional
|
|
|
8
8
|
import typer
|
|
9
9
|
|
|
10
10
|
from . import config_manager
|
|
11
|
+
from .embeddings import EMBEDDING_MODELS
|
|
11
12
|
|
|
12
13
|
app = typer.Typer(help="Setup wizard for LLM provider configuration")
|
|
13
14
|
|
|
@@ -287,6 +288,12 @@ def setup():
|
|
|
287
288
|
print_error("Failed to save configuration!")
|
|
288
289
|
raise typer.Exit(code=1)
|
|
289
290
|
|
|
291
|
+
# Offer embedding setup
|
|
292
|
+
typer.echo("")
|
|
293
|
+
setup_emb = typer.confirm("Configure embedding model for semantic search?", default=True)
|
|
294
|
+
if setup_emb:
|
|
295
|
+
_interactive_embedding_setup()
|
|
296
|
+
|
|
290
297
|
|
|
291
298
|
def set_llm(
|
|
292
299
|
provider: str = typer.Argument(..., help="LLM provider: ollama, groq, openai, anthropic, gemini, openrouter"),
|
|
@@ -466,5 +473,156 @@ def show_llm():
|
|
|
466
473
|
typer.echo("")
|
|
467
474
|
|
|
468
475
|
|
|
476
|
+
# ===================================================================
|
|
477
|
+
# Embedding model commands
|
|
478
|
+
# ===================================================================
|
|
479
|
+
|
|
480
|
+
def _interactive_embedding_setup():
|
|
481
|
+
"""Interactive embedding model picker (called from setup wizard)."""
|
|
482
|
+
typer.echo("")
|
|
483
|
+
typer.echo(typer.style("╭──────────────────────────────────────────────╮", fg=typer.colors.CYAN))
|
|
484
|
+
typer.echo(typer.style("│", fg=typer.colors.CYAN) + typer.style(" Embedding Model Setup ", bold=True) + typer.style("│", fg=typer.colors.CYAN))
|
|
485
|
+
typer.echo(typer.style("╰──────────────────────────────────────────────╯", fg=typer.colors.CYAN))
|
|
486
|
+
typer.echo("")
|
|
487
|
+
typer.echo("Choose an embedding model for semantic code search:")
|
|
488
|
+
typer.echo("Larger models give better results but need more disk/RAM.\n")
|
|
489
|
+
|
|
490
|
+
# List models with numbers
|
|
491
|
+
model_keys = list(EMBEDDING_MODELS.keys())
|
|
492
|
+
for i, key in enumerate(model_keys, 1):
|
|
493
|
+
spec = EMBEDDING_MODELS[key]
|
|
494
|
+
name_col = f"{key}".ljust(12)
|
|
495
|
+
size_col = f"({spec['size']})".ljust(14)
|
|
496
|
+
desc = spec["description"]
|
|
497
|
+
typer.echo(f" {i}) {name_col} {size_col} {desc}")
|
|
498
|
+
|
|
499
|
+
typer.echo("")
|
|
500
|
+
|
|
501
|
+
while True:
|
|
502
|
+
choice = typer.prompt(f"Enter choice [1-{len(model_keys)}]", type=str)
|
|
503
|
+
try:
|
|
504
|
+
idx = int(choice)
|
|
505
|
+
if 1 <= idx <= len(model_keys):
|
|
506
|
+
selected = model_keys[idx - 1]
|
|
507
|
+
break
|
|
508
|
+
except ValueError:
|
|
509
|
+
# Accept model key directly
|
|
510
|
+
if choice.strip() in model_keys:
|
|
511
|
+
selected = choice.strip()
|
|
512
|
+
break
|
|
513
|
+
print_error(f"Invalid choice. Enter 1-{len(model_keys)} or a model key.")
|
|
514
|
+
|
|
515
|
+
spec = EMBEDDING_MODELS[selected]
|
|
516
|
+
|
|
517
|
+
if selected != "hash":
|
|
518
|
+
typer.echo(f"\n Model: {typer.style(spec['name'], fg=typer.colors.CYAN)}")
|
|
519
|
+
typer.echo(f" Download: {typer.style(spec['size'], fg=typer.colors.YELLOW)}")
|
|
520
|
+
typer.echo(f" Dim: {spec['dim']}")
|
|
521
|
+
print_info("Requires: pip install codegraph-cli[embeddings]")
|
|
522
|
+
else:
|
|
523
|
+
typer.echo(f"\n Model: {typer.style('Hash Embedding (zero-dependency)', fg=typer.colors.CYAN)}")
|
|
524
|
+
print_info("No download needed, but no semantic understanding.")
|
|
525
|
+
|
|
526
|
+
success = config_manager.save_embedding_config(selected)
|
|
527
|
+
if success:
|
|
528
|
+
print_success(f"Embedding model set to: {selected}")
|
|
529
|
+
if selected != "hash":
|
|
530
|
+
print_info(f"Model will be downloaded on first use (~{spec['size']}).")
|
|
531
|
+
print_info("Re-index your project after changing embeddings: cg index <path>")
|
|
532
|
+
else:
|
|
533
|
+
print_error("Failed to save embedding config!")
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def set_embedding(
|
|
537
|
+
model: str = typer.Argument(
|
|
538
|
+
...,
|
|
539
|
+
help="Embedding model key: qodo-1.5b, jina-code, bge-base, minilm, hash",
|
|
540
|
+
),
|
|
541
|
+
):
|
|
542
|
+
"""Set the embedding model for semantic code search.
|
|
543
|
+
|
|
544
|
+
Available models (smallest to largest):
|
|
545
|
+
|
|
546
|
+
hash 0 bytes No download, keyword-level only
|
|
547
|
+
minilm ~80 MB Tiny, fast, decent quality
|
|
548
|
+
bge-base ~440 MB Solid general-purpose
|
|
549
|
+
jina-code ~550 MB Code-aware, good quality
|
|
550
|
+
qodo-1.5b ~6.2 GB Best quality, code-optimized
|
|
551
|
+
|
|
552
|
+
Examples:
|
|
553
|
+
cg set-embedding minilm
|
|
554
|
+
cg set-embedding jina-code
|
|
555
|
+
cg set-embedding hash
|
|
556
|
+
"""
|
|
557
|
+
model = model.lower().strip()
|
|
558
|
+
|
|
559
|
+
if model not in EMBEDDING_MODELS:
|
|
560
|
+
print_error(
|
|
561
|
+
f"Unknown model '{model}'. "
|
|
562
|
+
f"Choose from: {', '.join(EMBEDDING_MODELS.keys())}"
|
|
563
|
+
)
|
|
564
|
+
raise typer.Exit(code=1)
|
|
565
|
+
|
|
566
|
+
spec = EMBEDDING_MODELS[model]
|
|
567
|
+
success = config_manager.save_embedding_config(model)
|
|
568
|
+
|
|
569
|
+
if success:
|
|
570
|
+
print_success(f"Embedding model set to: {model}")
|
|
571
|
+
typer.echo(f" Name: {typer.style(spec['name'], fg=typer.colors.CYAN)}")
|
|
572
|
+
typer.echo(f" Dim: {spec['dim']}")
|
|
573
|
+
if model != "hash":
|
|
574
|
+
typer.echo(f" Size: {spec['size']} (downloaded on first use)")
|
|
575
|
+
print_info("Re-index your project after changing: cg index <path>")
|
|
576
|
+
else:
|
|
577
|
+
print_error("Failed to save configuration!")
|
|
578
|
+
raise typer.Exit(code=1)
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
def unset_embedding():
|
|
582
|
+
"""Reset embedding model to default (hash — no download)."""
|
|
583
|
+
success = config_manager.clear_embedding_config()
|
|
584
|
+
if success:
|
|
585
|
+
print_success("Embedding model reset to default (hash).")
|
|
586
|
+
print_info("No neural model will be used. Re-index to apply.")
|
|
587
|
+
else:
|
|
588
|
+
print_error("Failed to reset embedding config!")
|
|
589
|
+
raise typer.Exit(code=1)
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
def show_embedding():
|
|
593
|
+
"""Show current embedding model configuration."""
|
|
594
|
+
typer.echo("")
|
|
595
|
+
typer.echo(typer.style("╭──────────────────────────────────────────────╮", fg=typer.colors.CYAN))
|
|
596
|
+
typer.echo(typer.style("│", fg=typer.colors.CYAN) + typer.style(" Embedding Configuration ", bold=True) + typer.style("│", fg=typer.colors.CYAN))
|
|
597
|
+
typer.echo(typer.style("╰──────────────────────────────────────────────╯", fg=typer.colors.CYAN))
|
|
598
|
+
|
|
599
|
+
emb_cfg = config_manager.load_embedding_config()
|
|
600
|
+
current_key = emb_cfg.get("model", "hash")
|
|
601
|
+
spec = EMBEDDING_MODELS.get(current_key)
|
|
602
|
+
|
|
603
|
+
if spec is None:
|
|
604
|
+
typer.echo(f" Model {typer.style(current_key, fg=typer.colors.RED)} (unknown)")
|
|
605
|
+
else:
|
|
606
|
+
typer.echo(f" Model {typer.style(f' {current_key} ', bg=typer.colors.CYAN, fg=typer.colors.WHITE, bold=True)}")
|
|
607
|
+
typer.echo(f" Name {typer.style(spec['name'], bold=True)}")
|
|
608
|
+
typer.echo(f" Dim {spec['dim']}")
|
|
609
|
+
typer.echo(f" Size {spec['size']}")
|
|
610
|
+
typer.echo(f" Desc {spec['description']}")
|
|
611
|
+
|
|
612
|
+
typer.echo("")
|
|
613
|
+
typer.echo(typer.style(" Available Models", bold=True))
|
|
614
|
+
typer.echo(typer.style(" ─────────────────────────────────────────", dim=True))
|
|
615
|
+
for key, s in EMBEDDING_MODELS.items():
|
|
616
|
+
marker = typer.style(" *", fg=typer.colors.GREEN) if key == current_key else " "
|
|
617
|
+
typer.echo(f" {marker} {key.ljust(12)} {s['size'].ljust(12)} {s['description']}")
|
|
618
|
+
|
|
619
|
+
typer.echo("")
|
|
620
|
+
typer.echo(typer.style(" Quick Commands", bold=True))
|
|
621
|
+
typer.echo(typer.style(" ─────────────────────────────────────────", dim=True))
|
|
622
|
+
typer.echo(f" {typer.style('cg set-embedding <model>', fg=typer.colors.YELLOW)} Switch model")
|
|
623
|
+
typer.echo(f" {typer.style('cg unset-embedding', fg=typer.colors.YELLOW)} Reset to hash")
|
|
624
|
+
typer.echo("")
|
|
625
|
+
|
|
626
|
+
|
|
469
627
|
if __name__ == "__main__":
|
|
470
628
|
app()
|
codegraph_cli/config.py
CHANGED
|
@@ -13,10 +13,12 @@ SUPPORTED_EXTENSIONS = {".py"}
|
|
|
13
13
|
|
|
14
14
|
# Load configuration from TOML file (if available)
|
|
15
15
|
try:
|
|
16
|
-
from .config_manager import load_config
|
|
16
|
+
from .config_manager import load_config, load_embedding_config
|
|
17
17
|
_toml_config = load_config()
|
|
18
|
+
_emb_config = load_embedding_config()
|
|
18
19
|
except ImportError:
|
|
19
20
|
_toml_config = {}
|
|
21
|
+
_emb_config = {}
|
|
20
22
|
|
|
21
23
|
# LLM Provider Configuration — loaded from ~/.codegraph/config.toml (set via `cg setup` or `cg set-llm`)
|
|
22
24
|
LLM_PROVIDER = _toml_config.get("provider", "ollama")
|
|
@@ -24,6 +26,9 @@ LLM_API_KEY = _toml_config.get("api_key", "")
|
|
|
24
26
|
LLM_MODEL = _toml_config.get("model", "qwen2.5-coder:7b")
|
|
25
27
|
LLM_ENDPOINT = _toml_config.get("endpoint", "http://127.0.0.1:11434/api/generate")
|
|
26
28
|
|
|
29
|
+
# Embedding model — set via `cg set-embedding` (default: "hash" = no download)
|
|
30
|
+
EMBEDDING_MODEL = _emb_config.get("model", "hash")
|
|
31
|
+
|
|
27
32
|
|
|
28
33
|
def ensure_base_dirs() -> None:
|
|
29
34
|
"""Create base directories for local storage if needed."""
|
codegraph_cli/config_manager.py
CHANGED
|
@@ -78,11 +78,37 @@ def load_config() -> Dict[str, Any]:
|
|
|
78
78
|
return DEFAULT_CONFIGS["ollama"].copy()
|
|
79
79
|
|
|
80
80
|
|
|
81
|
+
def load_full_config() -> Dict[str, Any]:
|
|
82
|
+
"""Load the entire TOML config (all sections)."""
|
|
83
|
+
if not CONFIG_FILE.exists() or toml is None:
|
|
84
|
+
return {}
|
|
85
|
+
try:
|
|
86
|
+
with open(CONFIG_FILE, "r") as f:
|
|
87
|
+
return toml.load(f)
|
|
88
|
+
except Exception:
|
|
89
|
+
return {}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _save_full_config(config: Dict[str, Any]) -> bool:
|
|
93
|
+
"""Write entire config dict to TOML file, preserving all sections."""
|
|
94
|
+
if toml is None:
|
|
95
|
+
return False
|
|
96
|
+
BASE_DIR.mkdir(parents=True, exist_ok=True)
|
|
97
|
+
try:
|
|
98
|
+
with open(CONFIG_FILE, "w") as f:
|
|
99
|
+
toml.dump(config, f)
|
|
100
|
+
return True
|
|
101
|
+
except Exception:
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
|
|
81
105
|
def save_config(provider: str, model: str, api_key: str = "", endpoint: str = "") -> bool:
|
|
82
106
|
"""Save LLM configuration to TOML file.
|
|
83
107
|
|
|
108
|
+
Preserves other sections (e.g. ``[embeddings]``) in the file.
|
|
109
|
+
|
|
84
110
|
Args:
|
|
85
|
-
provider: Provider name (ollama, groq, openai, anthropic)
|
|
111
|
+
provider: Provider name (ollama, groq, openai, anthropic, gemini, openrouter)
|
|
86
112
|
model: Model name
|
|
87
113
|
api_key: API key for cloud providers
|
|
88
114
|
endpoint: Custom endpoint (for Ollama)
|
|
@@ -90,32 +116,56 @@ def save_config(provider: str, model: str, api_key: str = "", endpoint: str = ""
|
|
|
90
116
|
Returns:
|
|
91
117
|
True if saved successfully, False otherwise
|
|
92
118
|
"""
|
|
93
|
-
|
|
94
|
-
return False
|
|
95
|
-
|
|
96
|
-
# Ensure directory exists
|
|
97
|
-
BASE_DIR.mkdir(parents=True, exist_ok=True)
|
|
119
|
+
config = load_full_config()
|
|
98
120
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
"
|
|
102
|
-
"provider": provider,
|
|
103
|
-
"model": model,
|
|
104
|
-
}
|
|
121
|
+
config["llm"] = {
|
|
122
|
+
"provider": provider,
|
|
123
|
+
"model": model,
|
|
105
124
|
}
|
|
106
|
-
|
|
107
125
|
if api_key:
|
|
108
126
|
config["llm"]["api_key"] = api_key
|
|
109
|
-
|
|
110
127
|
if endpoint:
|
|
111
128
|
config["llm"]["endpoint"] = endpoint
|
|
112
129
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
130
|
+
return _save_full_config(config)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
# ------------------------------------------------------------------
|
|
134
|
+
# Embedding configuration
|
|
135
|
+
# ------------------------------------------------------------------
|
|
136
|
+
|
|
137
|
+
def load_embedding_config() -> Dict[str, Any]:
|
|
138
|
+
"""Load embedding configuration from ``[embeddings]`` section.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
Dict with at least ``model`` key, or empty dict.
|
|
142
|
+
"""
|
|
143
|
+
full = load_full_config()
|
|
144
|
+
return full.get("embeddings", {})
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def save_embedding_config(model_key: str) -> bool:
|
|
148
|
+
"""Save embedding model choice to config TOML.
|
|
149
|
+
|
|
150
|
+
Preserves ``[llm]`` and other sections.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
model_key: One of the keys from ``EMBEDDING_MODELS``
|
|
154
|
+
(e.g. ``"minilm"``, ``"jina-code"``, ``"hash"``).
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
True if saved successfully.
|
|
158
|
+
"""
|
|
159
|
+
config = load_full_config()
|
|
160
|
+
config["embeddings"] = {"model": model_key}
|
|
161
|
+
return _save_full_config(config)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def clear_embedding_config() -> bool:
|
|
165
|
+
"""Remove ``[embeddings]`` section from config, resetting to default."""
|
|
166
|
+
config = load_full_config()
|
|
167
|
+
config.pop("embeddings", None)
|
|
168
|
+
return _save_full_config(config)
|
|
119
169
|
|
|
120
170
|
|
|
121
171
|
def get_provider_config(provider: str) -> Dict[str, Any]:
|
codegraph_cli/context_manager.py
CHANGED
codegraph_cli/embeddings.py
CHANGED
|
@@ -1,23 +1,32 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
1
|
+
"""Configurable code embedding engine with multiple model support.
|
|
2
|
+
|
|
3
|
+
Supported models (configure via ``cg set-embedding``):
|
|
4
|
+
|
|
5
|
+
========== ====================================== ========= ====== ======================
|
|
6
|
+
Key HuggingFace Model Download Dim Notes
|
|
7
|
+
========== ====================================== ========= ====== ======================
|
|
8
|
+
qodo-1.5b Qodo/Qodo-Embed-1-1.5B ~6.2 GB 1536 Best quality, code-optimized
|
|
9
|
+
jina-code jinaai/jina-embeddings-v2-base-code ~550 MB 768 Good quality, code-aware
|
|
10
|
+
bge-base BAAI/bge-base-en-v1.5 ~440 MB 768 Solid general-purpose
|
|
11
|
+
minilm sentence-transformers/all-MiniLM-L6-v2 ~80 MB 384 Tiny and fast
|
|
12
|
+
hash (none) 0 B 256 No ML, keyword-level only
|
|
13
|
+
========== ====================================== ========= ====== ======================
|
|
14
|
+
|
|
15
|
+
Architecture:
|
|
16
|
+
- Models downloaded once from HuggingFace and cached in ``~/.codegraph/models``.
|
|
17
|
+
- All inference runs on-device (CPU or GPU). No data leaves the machine.
|
|
18
|
+
- Uses raw ``transformers`` library only — no sentence-transformers, no flash_attn.
|
|
19
|
+
- Falls back to hash embeddings when ``torch``/``transformers`` are not installed.
|
|
10
20
|
"""
|
|
11
21
|
|
|
12
22
|
from __future__ import annotations
|
|
13
23
|
|
|
14
24
|
import logging
|
|
15
25
|
import math
|
|
16
|
-
import os
|
|
17
26
|
import re
|
|
18
27
|
from hashlib import blake2b
|
|
19
28
|
from pathlib import Path
|
|
20
|
-
from typing import Iterable, List, Optional, Union
|
|
29
|
+
from typing import Any, Dict, Iterable, List, Optional, Union
|
|
21
30
|
|
|
22
31
|
from .config import BASE_DIR
|
|
23
32
|
|
|
@@ -26,44 +35,115 @@ logger = logging.getLogger(__name__)
|
|
|
26
35
|
# Default local model cache directory
|
|
27
36
|
MODEL_CACHE_DIR: Path = BASE_DIR / "models"
|
|
28
37
|
|
|
29
|
-
# Preferred models in priority order
|
|
30
|
-
PREFERRED_MODELS: List[str] = [
|
|
31
|
-
"all-MiniLM-L6-v2",
|
|
32
|
-
"nomic-ai/nomic-embed-text-v1.5",
|
|
33
|
-
]
|
|
34
|
-
|
|
35
38
|
_TOKEN_RE = re.compile(r"[A-Za-z_][A-Za-z0-9_]*")
|
|
36
39
|
|
|
37
40
|
|
|
38
41
|
# ===================================================================
|
|
39
|
-
#
|
|
42
|
+
# Model Registry
|
|
43
|
+
# ===================================================================
|
|
44
|
+
|
|
45
|
+
EMBEDDING_MODELS: Dict[str, Dict[str, Any]] = {
|
|
46
|
+
"qodo-1.5b": {
|
|
47
|
+
"name": "Qodo Embed 1.5B",
|
|
48
|
+
"hf_id": "Qodo/Qodo-Embed-1-1.5B",
|
|
49
|
+
"dim": 1536,
|
|
50
|
+
"max_tokens": 8192,
|
|
51
|
+
"size": "~6.2 GB",
|
|
52
|
+
"description": "Best quality, code-optimized (needs 8GB+ RAM)",
|
|
53
|
+
"pooling": "last_token",
|
|
54
|
+
"trust_remote_code": True,
|
|
55
|
+
},
|
|
56
|
+
"jina-code": {
|
|
57
|
+
"name": "Jina Embeddings v2 Code",
|
|
58
|
+
"hf_id": "jinaai/jina-embeddings-v2-base-code",
|
|
59
|
+
"dim": 768,
|
|
60
|
+
"max_tokens": 8192,
|
|
61
|
+
"size": "~550 MB",
|
|
62
|
+
"description": "Good quality, code-aware, lightweight",
|
|
63
|
+
"pooling": "mean",
|
|
64
|
+
"trust_remote_code": True,
|
|
65
|
+
},
|
|
66
|
+
"bge-base": {
|
|
67
|
+
"name": "BGE Base EN v1.5",
|
|
68
|
+
"hf_id": "BAAI/bge-base-en-v1.5",
|
|
69
|
+
"dim": 768,
|
|
70
|
+
"max_tokens": 512,
|
|
71
|
+
"size": "~440 MB",
|
|
72
|
+
"description": "Solid general-purpose, fast",
|
|
73
|
+
"pooling": "cls",
|
|
74
|
+
"trust_remote_code": False,
|
|
75
|
+
},
|
|
76
|
+
"minilm": {
|
|
77
|
+
"name": "MiniLM L6 v2",
|
|
78
|
+
"hf_id": "sentence-transformers/all-MiniLM-L6-v2",
|
|
79
|
+
"dim": 384,
|
|
80
|
+
"max_tokens": 256,
|
|
81
|
+
"size": "~80 MB",
|
|
82
|
+
"description": "Tiny and fast, decent quality",
|
|
83
|
+
"pooling": "mean",
|
|
84
|
+
"trust_remote_code": False,
|
|
85
|
+
},
|
|
86
|
+
"hash": {
|
|
87
|
+
"name": "Hash Embedding",
|
|
88
|
+
"hf_id": None,
|
|
89
|
+
"dim": 256,
|
|
90
|
+
"max_tokens": None,
|
|
91
|
+
"size": "0 bytes",
|
|
92
|
+
"description": "Zero-dependency fallback, no semantics",
|
|
93
|
+
"pooling": None,
|
|
94
|
+
"trust_remote_code": False,
|
|
95
|
+
},
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
DEFAULT_MODEL = "hash"
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# ===================================================================
|
|
102
|
+
# TransformerEmbedder (handles all HuggingFace models)
|
|
40
103
|
# ===================================================================
|
|
41
104
|
|
|
42
|
-
class
|
|
43
|
-
"""
|
|
105
|
+
class TransformerEmbedder:
|
|
106
|
+
"""Generic HuggingFace embedding engine with configurable pooling.
|
|
44
107
|
|
|
45
|
-
|
|
46
|
-
``~/.codegraph/models`` so that subsequent runs are fully offline.
|
|
47
|
-
All computation is local – **no data leaves the machine**.
|
|
108
|
+
Supports multiple pooling strategies:
|
|
48
109
|
|
|
49
|
-
|
|
110
|
+
- **last_token** — last non-padding token (Qodo models).
|
|
111
|
+
- **mean** — mean over non-padding tokens (Jina, MiniLM).
|
|
112
|
+
- **cls** — ``[CLS]`` first token (BGE models).
|
|
50
113
|
|
|
51
|
-
|
|
52
|
-
|
|
114
|
+
Model weights are downloaded on first use and cached in
|
|
115
|
+
``~/.codegraph/models/`` for offline subsequent runs.
|
|
53
116
|
"""
|
|
54
117
|
|
|
55
118
|
def __init__(
|
|
56
119
|
self,
|
|
57
|
-
|
|
120
|
+
model_key: str,
|
|
58
121
|
cache_dir: Optional[Path] = None,
|
|
59
122
|
device: str = "cpu",
|
|
60
123
|
) -> None:
|
|
61
|
-
|
|
124
|
+
if model_key not in EMBEDDING_MODELS:
|
|
125
|
+
raise ValueError(
|
|
126
|
+
f"Unknown model: '{model_key}'. "
|
|
127
|
+
f"Available: {', '.join(EMBEDDING_MODELS.keys())}"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
spec = EMBEDDING_MODELS[model_key]
|
|
131
|
+
if spec["hf_id"] is None:
|
|
132
|
+
raise ValueError(
|
|
133
|
+
f"'{model_key}' has no transformer backend. Use HashEmbeddingModel."
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
self.model_key = model_key
|
|
137
|
+
self.hf_id: str = spec["hf_id"]
|
|
138
|
+
self.dim: int = spec["dim"]
|
|
139
|
+
self.max_length: int = spec["max_tokens"]
|
|
140
|
+
self.pooling: str = spec["pooling"]
|
|
141
|
+
self.trust_remote_code: bool = spec["trust_remote_code"]
|
|
62
142
|
self.cache_dir = cache_dir or MODEL_CACHE_DIR
|
|
63
143
|
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
64
144
|
self.device = device
|
|
65
|
-
self._model:
|
|
66
|
-
self.
|
|
145
|
+
self._model: Any = None
|
|
146
|
+
self._tokenizer: Any = None
|
|
67
147
|
|
|
68
148
|
# ------------------------------------------------------------------
|
|
69
149
|
# Lazy model loading
|
|
@@ -74,100 +154,154 @@ class NeuralEmbedder:
|
|
|
74
154
|
return
|
|
75
155
|
|
|
76
156
|
try:
|
|
77
|
-
|
|
157
|
+
import torch # noqa: F401
|
|
158
|
+
from transformers import AutoModel, AutoTokenizer
|
|
78
159
|
except ImportError:
|
|
79
160
|
raise ImportError(
|
|
80
|
-
"
|
|
81
|
-
"Install with: pip install
|
|
161
|
+
"torch and transformers are required for neural embeddings.\n"
|
|
162
|
+
"Install with: pip install codegraph-cli[embeddings]\n"
|
|
163
|
+
"For CPU-only (skip NVIDIA packages):\n"
|
|
164
|
+
" pip install torch --index-url https://download.pytorch.org/whl/cpu\n"
|
|
165
|
+
" pip install transformers"
|
|
82
166
|
)
|
|
83
167
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
168
|
+
logger.info(
|
|
169
|
+
"Loading embedding model '%s' (%s) — first run downloads %s...",
|
|
170
|
+
self.model_key, self.hf_id, EMBEDDING_MODELS[self.model_key]["size"],
|
|
87
171
|
)
|
|
88
172
|
|
|
89
173
|
try:
|
|
90
|
-
self.
|
|
91
|
-
self.
|
|
92
|
-
|
|
93
|
-
|
|
174
|
+
self._tokenizer = AutoTokenizer.from_pretrained(
|
|
175
|
+
self.hf_id,
|
|
176
|
+
cache_dir=str(self.cache_dir),
|
|
177
|
+
trust_remote_code=self.trust_remote_code,
|
|
94
178
|
)
|
|
95
|
-
self.
|
|
179
|
+
self._model = AutoModel.from_pretrained(
|
|
180
|
+
self.hf_id,
|
|
181
|
+
cache_dir=str(self.cache_dir),
|
|
182
|
+
trust_remote_code=self.trust_remote_code,
|
|
183
|
+
)
|
|
184
|
+
self._model.eval()
|
|
185
|
+
self._model.to(self.device)
|
|
96
186
|
logger.info(
|
|
97
|
-
"Loaded
|
|
98
|
-
self.
|
|
187
|
+
"Loaded '%s' (dim=%d, pooling=%s) on %s",
|
|
188
|
+
self.model_key, self.dim, self.pooling, self.device,
|
|
99
189
|
)
|
|
100
190
|
except Exception as exc:
|
|
101
191
|
raise RuntimeError(
|
|
102
|
-
f"Failed to load embedding model '{self.
|
|
192
|
+
f"Failed to load embedding model '{self.model_key}' "
|
|
193
|
+
f"({self.hf_id}): {exc}"
|
|
103
194
|
) from exc
|
|
104
195
|
|
|
105
196
|
# ------------------------------------------------------------------
|
|
106
|
-
#
|
|
197
|
+
# Pooling strategies
|
|
107
198
|
# ------------------------------------------------------------------
|
|
108
199
|
|
|
109
|
-
@
|
|
110
|
-
def
|
|
111
|
-
"""
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
200
|
+
@staticmethod
|
|
201
|
+
def _pool_last_token(last_hidden_states: Any, attention_mask: Any) -> Any:
|
|
202
|
+
"""Last non-padding token (Qodo style)."""
|
|
203
|
+
import torch
|
|
204
|
+
|
|
205
|
+
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
|
|
206
|
+
if left_padding:
|
|
207
|
+
return last_hidden_states[:, -1]
|
|
208
|
+
sequence_lengths = attention_mask.sum(dim=1) - 1
|
|
209
|
+
batch_size = last_hidden_states.shape[0]
|
|
210
|
+
return last_hidden_states[
|
|
211
|
+
torch.arange(batch_size, device=last_hidden_states.device),
|
|
212
|
+
sequence_lengths,
|
|
213
|
+
]
|
|
214
|
+
|
|
215
|
+
@staticmethod
|
|
216
|
+
def _pool_mean(last_hidden_states: Any, attention_mask: Any) -> Any:
|
|
217
|
+
"""Mean over non-padding tokens (Jina, MiniLM)."""
|
|
218
|
+
mask_expanded = attention_mask.unsqueeze(-1).expand(
|
|
219
|
+
last_hidden_states.size()
|
|
220
|
+
).float()
|
|
221
|
+
sum_embeddings = (last_hidden_states * mask_expanded).sum(dim=1)
|
|
222
|
+
sum_mask = mask_expanded.sum(dim=1).clamp(min=1e-9)
|
|
223
|
+
return sum_embeddings / sum_mask
|
|
224
|
+
|
|
225
|
+
@staticmethod
|
|
226
|
+
def _pool_cls(last_hidden_states: Any, attention_mask: Any) -> Any:
|
|
227
|
+
"""[CLS] first token (BGE)."""
|
|
228
|
+
return last_hidden_states[:, 0]
|
|
229
|
+
|
|
230
|
+
def _pool(self, last_hidden_states: Any, attention_mask: Any) -> Any:
|
|
231
|
+
"""Dispatch to the pooling strategy for this model."""
|
|
232
|
+
if self.pooling == "last_token":
|
|
233
|
+
return self._pool_last_token(last_hidden_states, attention_mask)
|
|
234
|
+
if self.pooling == "mean":
|
|
235
|
+
return self._pool_mean(last_hidden_states, attention_mask)
|
|
236
|
+
if self.pooling == "cls":
|
|
237
|
+
return self._pool_cls(last_hidden_states, attention_mask)
|
|
238
|
+
raise ValueError(f"Unknown pooling strategy: {self.pooling}")
|
|
239
|
+
|
|
240
|
+
# ------------------------------------------------------------------
|
|
241
|
+
# Encode
|
|
242
|
+
# ------------------------------------------------------------------
|
|
243
|
+
|
|
244
|
+
def _encode(self, texts: List[str]) -> List[List[float]]:
|
|
245
|
+
"""Encode a batch of texts into L2-normalised embedding vectors."""
|
|
246
|
+
import torch
|
|
247
|
+
import torch.nn.functional as F
|
|
116
248
|
|
|
117
|
-
def embed_text(self, text: str) -> List[float]:
|
|
118
|
-
"""Embed a single text string and return a unit-norm vector."""
|
|
119
249
|
self._load_model()
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
250
|
+
|
|
251
|
+
batch_dict = self._tokenizer(
|
|
252
|
+
texts,
|
|
253
|
+
max_length=self.max_length,
|
|
254
|
+
padding=True,
|
|
255
|
+
truncation=True,
|
|
256
|
+
return_tensors="pt",
|
|
257
|
+
)
|
|
258
|
+
batch_dict = {k: v.to(self.device) for k, v in batch_dict.items()}
|
|
259
|
+
|
|
260
|
+
with torch.no_grad():
|
|
261
|
+
outputs = self._model(**batch_dict)
|
|
262
|
+
|
|
263
|
+
embeddings = self._pool(
|
|
264
|
+
outputs.last_hidden_state, batch_dict["attention_mask"],
|
|
126
265
|
)
|
|
127
|
-
|
|
266
|
+
embeddings = F.normalize(embeddings, p=2, dim=1)
|
|
267
|
+
return embeddings.cpu().tolist()
|
|
268
|
+
|
|
269
|
+
# ------------------------------------------------------------------
|
|
270
|
+
# Public API
|
|
271
|
+
# ------------------------------------------------------------------
|
|
272
|
+
|
|
273
|
+
def embed_text(self, text: str) -> List[float]:
|
|
274
|
+
"""Embed a single text string and return a unit-norm vector."""
|
|
275
|
+
return self._encode([text])[0]
|
|
128
276
|
|
|
129
277
|
def embed_documents(
|
|
130
278
|
self,
|
|
131
279
|
texts: List[str],
|
|
132
|
-
batch_size: int =
|
|
280
|
+
batch_size: int = 16,
|
|
133
281
|
) -> List[List[float]]:
|
|
134
|
-
"""Embed multiple documents with batching
|
|
135
|
-
|
|
136
|
-
Args:
|
|
137
|
-
texts: List of text strings to embed.
|
|
138
|
-
batch_size: Number of texts per forward pass.
|
|
139
|
-
|
|
140
|
-
Returns:
|
|
141
|
-
List of embedding vectors (each normalised to unit length).
|
|
142
|
-
"""
|
|
282
|
+
"""Embed multiple documents with batching."""
|
|
143
283
|
if not texts:
|
|
144
284
|
return []
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
batch_size=batch_size,
|
|
150
|
-
convert_to_numpy=True,
|
|
151
|
-
normalize_embeddings=True,
|
|
152
|
-
show_progress_bar=len(texts) > 100,
|
|
153
|
-
)
|
|
154
|
-
return embeddings.tolist()
|
|
285
|
+
all_embeddings: List[List[float]] = []
|
|
286
|
+
for i in range(0, len(texts), batch_size):
|
|
287
|
+
all_embeddings.extend(self._encode(texts[i : i + batch_size]))
|
|
288
|
+
return all_embeddings
|
|
155
289
|
|
|
156
|
-
# Backward-compat alias used by legacy callers
|
|
157
290
|
def embed_many(self, texts: Iterable[str]) -> List[List[float]]:
|
|
158
291
|
"""Alias for :meth:`embed_documents`."""
|
|
159
292
|
return self.embed_documents(list(texts))
|
|
160
293
|
|
|
161
294
|
|
|
162
295
|
# ===================================================================
|
|
163
|
-
# HashEmbeddingModel (
|
|
296
|
+
# HashEmbeddingModel (Zero-dependency fallback)
|
|
164
297
|
# ===================================================================
|
|
165
298
|
|
|
166
299
|
class HashEmbeddingModel:
|
|
167
|
-
"""Deterministic token-hashing embedder
|
|
300
|
+
"""Deterministic token-hashing embedder — no ML dependencies.
|
|
168
301
|
|
|
169
|
-
Provides basic keyword-level similarity.
|
|
170
|
-
|
|
302
|
+
Provides basic keyword-level similarity. Used as the default when
|
|
303
|
+
``torch``/``transformers`` are not installed or when ``hash`` is
|
|
304
|
+
selected via ``cg set-embedding hash``.
|
|
171
305
|
"""
|
|
172
306
|
|
|
173
307
|
def __init__(self, dim: int = 256) -> None:
|
|
@@ -189,7 +323,7 @@ class HashEmbeddingModel:
|
|
|
189
323
|
return [self.embed_text(text) for text in texts]
|
|
190
324
|
|
|
191
325
|
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
|
192
|
-
"""Alias matching the
|
|
326
|
+
"""Alias matching the TransformerEmbedder interface."""
|
|
193
327
|
return self.embed_many(texts)
|
|
194
328
|
|
|
195
329
|
|
|
@@ -198,27 +332,61 @@ class HashEmbeddingModel:
|
|
|
198
332
|
# ===================================================================
|
|
199
333
|
|
|
200
334
|
def get_embedder(
|
|
201
|
-
|
|
335
|
+
model_key: Optional[str] = None,
|
|
202
336
|
cache_dir: Optional[Path] = None,
|
|
203
337
|
device: str = "cpu",
|
|
204
|
-
) -> Union[
|
|
205
|
-
"""Return the
|
|
338
|
+
) -> Union[TransformerEmbedder, HashEmbeddingModel]:
|
|
339
|
+
"""Return the configured embedder.
|
|
340
|
+
|
|
341
|
+
Resolution order:
|
|
342
|
+
|
|
343
|
+
1. Explicit ``model_key`` argument.
|
|
344
|
+
2. ``[embeddings].model`` from ``~/.codegraph/config.toml``.
|
|
345
|
+
3. ``"hash"`` (zero-dependency fallback).
|
|
206
346
|
|
|
207
|
-
|
|
208
|
-
|
|
347
|
+
If a transformer model is configured but ``torch``/``transformers``
|
|
348
|
+
are missing, falls back to hash with a warning.
|
|
209
349
|
"""
|
|
350
|
+
if model_key is None:
|
|
351
|
+
try:
|
|
352
|
+
from .config_manager import load_embedding_config
|
|
353
|
+
emb_cfg = load_embedding_config()
|
|
354
|
+
model_key = emb_cfg.get("model", None)
|
|
355
|
+
except Exception:
|
|
356
|
+
model_key = None
|
|
357
|
+
|
|
358
|
+
# Default to hash if nothing configured
|
|
359
|
+
if model_key is None:
|
|
360
|
+
model_key = DEFAULT_MODEL
|
|
361
|
+
|
|
362
|
+
# Hash path — no ML needed
|
|
363
|
+
if model_key == "hash":
|
|
364
|
+
return HashEmbeddingModel()
|
|
365
|
+
|
|
366
|
+
# Unknown model guard
|
|
367
|
+
if model_key not in EMBEDDING_MODELS:
|
|
368
|
+
logger.warning(
|
|
369
|
+
"Unknown embedding model '%s' — falling back to hash.", model_key,
|
|
370
|
+
)
|
|
371
|
+
return HashEmbeddingModel()
|
|
372
|
+
|
|
373
|
+
spec = EMBEDDING_MODELS[model_key]
|
|
374
|
+
if spec["hf_id"] is None:
|
|
375
|
+
return HashEmbeddingModel()
|
|
376
|
+
|
|
377
|
+
# Transformer path — check dependencies
|
|
210
378
|
try:
|
|
211
|
-
import
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
cache_dir=cache_dir,
|
|
215
|
-
device=device,
|
|
379
|
+
import torch # noqa: F401
|
|
380
|
+
import transformers # noqa: F401
|
|
381
|
+
return TransformerEmbedder(
|
|
382
|
+
model_key=model_key, cache_dir=cache_dir, device=device,
|
|
216
383
|
)
|
|
217
384
|
except ImportError:
|
|
218
385
|
logger.warning(
|
|
219
|
-
"
|
|
220
|
-
"
|
|
221
|
-
"
|
|
386
|
+
"Embedding model '%s' requires torch + transformers. "
|
|
387
|
+
"Falling back to hash embeddings. Install with: "
|
|
388
|
+
"pip install codegraph-cli[embeddings]",
|
|
389
|
+
model_key,
|
|
222
390
|
)
|
|
223
391
|
return HashEmbeddingModel()
|
|
224
392
|
|
codegraph_cli/orchestrator.py
CHANGED
|
@@ -6,7 +6,7 @@ from pathlib import Path
|
|
|
6
6
|
from typing import Dict, List
|
|
7
7
|
|
|
8
8
|
from .agents import GraphAgent, RAGAgent, SummarizationAgent
|
|
9
|
-
from .embeddings import
|
|
9
|
+
from .embeddings import get_embedder
|
|
10
10
|
from .llm import LocalLLM
|
|
11
11
|
from .models import ImpactReport, SearchResult
|
|
12
12
|
from .rag import RAGRetriever
|
|
@@ -25,7 +25,7 @@ class MCPOrchestrator:
|
|
|
25
25
|
llm_endpoint: str | None = None,
|
|
26
26
|
):
|
|
27
27
|
self.store = store
|
|
28
|
-
self.embedding_model =
|
|
28
|
+
self.embedding_model = get_embedder()
|
|
29
29
|
self.graph_agent = GraphAgent(store, self.embedding_model)
|
|
30
30
|
self.rag_agent = RAGAgent(RAGRetriever(store, self.embedding_model))
|
|
31
31
|
self.summarization_agent = SummarizationAgent(
|
codegraph_cli/rag.py
CHANGED
|
@@ -11,7 +11,7 @@ import json
|
|
|
11
11
|
import logging
|
|
12
12
|
from typing import Any, Dict, List, Optional, Union
|
|
13
13
|
|
|
14
|
-
from .embeddings import HashEmbeddingModel,
|
|
14
|
+
from .embeddings import HashEmbeddingModel, TransformerEmbedder, cosine_similarity
|
|
15
15
|
from .models import SearchResult
|
|
16
16
|
from .storage import GraphStore
|
|
17
17
|
|
|
@@ -29,14 +29,14 @@ class RAGRetriever:
|
|
|
29
29
|
cosine similarity in Python.
|
|
30
30
|
|
|
31
31
|
The ``embedding_model`` argument accepts either a
|
|
32
|
-
:class:`~codegraph_cli.embeddings.
|
|
32
|
+
:class:`~codegraph_cli.embeddings.TransformerEmbedder` or the lightweight
|
|
33
33
|
:class:`~codegraph_cli.embeddings.HashEmbeddingModel`.
|
|
34
34
|
"""
|
|
35
35
|
|
|
36
36
|
def __init__(
|
|
37
37
|
self,
|
|
38
38
|
store: GraphStore,
|
|
39
|
-
embedding_model: Union[
|
|
39
|
+
embedding_model: Union[TransformerEmbedder, HashEmbeddingModel, Any],
|
|
40
40
|
) -> None:
|
|
41
41
|
self.store = store
|
|
42
42
|
self.embedding_model = embedding_model
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codegraph-cli
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.1.0
|
|
4
4
|
Summary: AI-powered code intelligence CLI with multi-agent analysis, impact graphs, and conversational coding.
|
|
5
|
-
Author-email: Ali Nasir <
|
|
5
|
+
Author-email: Ali Nasir <muhammadalinasir00786@gmail.com>
|
|
6
6
|
License: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/al1-nasir/codegraph-cli
|
|
8
8
|
Project-URL: Documentation, https://github.com/al1-nasir/codegraph-cli#readme
|
|
@@ -31,7 +31,6 @@ Requires-Dist: typer<1.0.0,>=0.12.0
|
|
|
31
31
|
Requires-Dist: toml>=0.10.2
|
|
32
32
|
Requires-Dist: lancedb>=0.4.0
|
|
33
33
|
Requires-Dist: pyarrow>=14.0.0
|
|
34
|
-
Requires-Dist: sentence-transformers>=2.2.0
|
|
35
34
|
Requires-Dist: tree-sitter>=0.24.0
|
|
36
35
|
Requires-Dist: tree-sitter-python>=0.23.0
|
|
37
36
|
Requires-Dist: tree-sitter-javascript>=0.23.0
|
|
@@ -45,9 +44,13 @@ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
|
45
44
|
Requires-Dist: pytest-mock>=3.11.0; extra == "dev"
|
|
46
45
|
Requires-Dist: build>=1.0.0; extra == "dev"
|
|
47
46
|
Requires-Dist: twine>=5.0.0; extra == "dev"
|
|
47
|
+
Provides-Extra: embeddings
|
|
48
|
+
Requires-Dist: torch>=2.0.0; extra == "embeddings"
|
|
49
|
+
Requires-Dist: transformers<5.0.0,>=4.48.0; extra == "embeddings"
|
|
48
50
|
Provides-Extra: all
|
|
49
51
|
Requires-Dist: crewai>=0.80.0; extra == "all"
|
|
50
|
-
Requires-Dist:
|
|
52
|
+
Requires-Dist: torch>=2.0.0; extra == "all"
|
|
53
|
+
Requires-Dist: transformers<5.0.0,>=4.48.0; extra == "all"
|
|
51
54
|
Dynamic: license-file
|
|
52
55
|
|
|
53
56
|
# CodeGraph CLI
|
|
@@ -1,33 +1,33 @@
|
|
|
1
|
-
codegraph_cli/__init__.py,sha256=
|
|
2
|
-
codegraph_cli/agents.py,sha256=
|
|
1
|
+
codegraph_cli/__init__.py,sha256=qTFuIhMU-qKms6nhobwg3YUgDBKR0JenO_3Pq5VgHEk,78
|
|
2
|
+
codegraph_cli/agents.py,sha256=i4VpklF2WLgpS7bmCPcH5lAzohxErZLP5wvssmEK38w,7010
|
|
3
3
|
codegraph_cli/bug_detector.py,sha256=soT4luB5eQx6qrU5rgFCsG44rdo9jRpV0hn-b0f3LPo,16419
|
|
4
4
|
codegraph_cli/chat_agent.py,sha256=dbkEY3zaPJh0ztYaVkCwkTw5zSLGArHkChC_6JWOneg,13685
|
|
5
5
|
codegraph_cli/chat_session.py,sha256=GVey-hnfsa9fa6k2PY1sgy1wtrYSUHKE5cJDV2hG-tg,7038
|
|
6
|
-
codegraph_cli/cli.py,sha256=
|
|
7
|
-
codegraph_cli/cli_chat.py,sha256=
|
|
6
|
+
codegraph_cli/cli.py,sha256=eEzH4TOgyMAFJpVhh2hU0MD2oh61s1hBomeSFx3I3qE,11199
|
|
7
|
+
codegraph_cli/cli_chat.py,sha256=8vk0zrhFQ6MGUa4KomZfnlBXN-Tw-D6aWjqeQPVFxL8,14172
|
|
8
8
|
codegraph_cli/cli_diagnose.py,sha256=gT4qHayC_uWRMsr1Tf92BCFJfRcXAMq8XdEImatrSkU,4260
|
|
9
9
|
codegraph_cli/cli_refactor.py,sha256=_u5RvsF3-KV5C_QnErA4sowlkIAmlxSeLeWKBmSusCI,8176
|
|
10
|
-
codegraph_cli/cli_setup.py,sha256=
|
|
10
|
+
codegraph_cli/cli_setup.py,sha256=f8KdcE0Tf9HQ_ewQm1R_4OZ91bOmi0kuM8eQ05Vs7is,24749
|
|
11
11
|
codegraph_cli/cli_test.py,sha256=ZFPIRhbZ9YYIuSWJyPYLi9PEdHZAI9h8FkWXXRYfqcw,5561
|
|
12
12
|
codegraph_cli/cli_v2.py,sha256=iuw3h5gtvsTg5SdUFXSdLx1Ttiq-oUDM7ZugMqMfETg,9465
|
|
13
13
|
codegraph_cli/codegen_agent.py,sha256=F73YZIIVgE5pOvJsKBl0cv22VW3rP_SGj2viwZS-rqE,9193
|
|
14
|
-
codegraph_cli/config.py,sha256=
|
|
15
|
-
codegraph_cli/config_manager.py,sha256=
|
|
16
|
-
codegraph_cli/context_manager.py,sha256=
|
|
14
|
+
codegraph_cli/config.py,sha256=rOq4lDvqmoly1pfEukzPeCUb76BMqK7cUbzDSFHhsC8,1291
|
|
15
|
+
codegraph_cli/config_manager.py,sha256=K81Ca7jHzHlwxoJsSeRezl8V-iGGJD_IEGE7ZWo3eG0,11422
|
|
16
|
+
codegraph_cli/context_manager.py,sha256=qEKjI7llcLX9y8NFTDs3aiHDm7nDF9jTbhu3tHHOk6w,16824
|
|
17
17
|
codegraph_cli/crew_agents.py,sha256=PKb0skEmxBy2_Ryq67XccmPizLFLgPNs43xOqjSHcGM,6006
|
|
18
18
|
codegraph_cli/crew_chat.py,sha256=tdo8Zf9lOp5-XMdvNntLxo5hKrDcEOe3165cH22-nvQ,6149
|
|
19
19
|
codegraph_cli/crew_tools.py,sha256=wg39mkPDHeE2Wuy3q54xl5d243MHQZBcXCWR2rHrd3M,19339
|
|
20
20
|
codegraph_cli/diff_engine.py,sha256=VGwPG_pZFVz8lGuVHZz_0nhrDocglugw6TumMmnHdTY,8968
|
|
21
|
-
codegraph_cli/embeddings.py,sha256=
|
|
21
|
+
codegraph_cli/embeddings.py,sha256=YoR6OjiIFC628EnLhNWbw2-_YWqtxSlL--tNWHGsKRk,14611
|
|
22
22
|
codegraph_cli/graph_export.py,sha256=gPyRrOc4_gnW-JaHmmp2pAD60PiZIj_uYA6b0xfU5O0,4562
|
|
23
23
|
codegraph_cli/llm.py,sha256=RpGjJKhUvejmtCHTb9FpGInwPtfaEkHBChBSBTwxUUo,23170
|
|
24
24
|
codegraph_cli/models.py,sha256=o6Wlu8TtWEPDWgq0AhB1xJtxzVfViBMQoCW_4AS29p0,794
|
|
25
25
|
codegraph_cli/models_v2.py,sha256=8zS16hT4SlIahMBwDZ7j4I8fdm3YyWv5qD0urJv1LsI,5521
|
|
26
|
-
codegraph_cli/orchestrator.py,sha256=
|
|
26
|
+
codegraph_cli/orchestrator.py,sha256=AguYRsZ-xu-biM3-uZMhRf6QaoQEqSlGxC7eL3fZqXE,1790
|
|
27
27
|
codegraph_cli/parser.py,sha256=vtKOwirs30O9UxJ6siHzvEWLx4-PxMn5dAfhb42QBG4,29193
|
|
28
28
|
codegraph_cli/performance_analyzer.py,sha256=f9PNMZQ_8jWvzs4osPYgTW2eOsvDytIRmfWWO5DuWCs,10090
|
|
29
29
|
codegraph_cli/project_context.py,sha256=9tSEDEPRmfEQfLcyWXjPa8IGFC1sZI1ysochoxrm4y0,7672
|
|
30
|
-
codegraph_cli/rag.py,sha256=
|
|
30
|
+
codegraph_cli/rag.py,sha256=DTijL8uZjdEeShQHWFtC_EkoINRgzT3Cr_oHuHHQcfA,7125
|
|
31
31
|
codegraph_cli/refactor_agent.py,sha256=ktQyhUn5YjhbXt7IVgKV7JgSZaT4AivWLLpMlZ7NLXw,16657
|
|
32
32
|
codegraph_cli/security_scanner.py,sha256=rPf8PcYMBllco4PkrxfILJEqKaj1UuEKqCupVycKpo8,15681
|
|
33
33
|
codegraph_cli/storage.py,sha256=XR_w6nJ_ge4r72bfxuuY8Zt8qi8CtHm4EE268EB5kBE,14340
|
|
@@ -35,9 +35,9 @@ codegraph_cli/testgen_agent.py,sha256=rqlKbLeEnjfzAZhQUXqLPwFKwRIpiHriTPxVgPCuR_
|
|
|
35
35
|
codegraph_cli/validation_engine.py,sha256=pzoRH_b06gWfiDZ5Yiecf0SWDWs4oJ66JokggGZZbaw,9029
|
|
36
36
|
codegraph_cli/vector_store.py,sha256=qbIBVDoNOha8JgZwrk7_Jdb7RMYUnBLphJfmqQdrVN4,9912
|
|
37
37
|
codegraph_cli/templates/graph_interactive.html,sha256=PFpU69DbY-Vkcu5UTiqOva_LrZjN2erdz7VXPgNSt6Q,7813
|
|
38
|
-
codegraph_cli-2.
|
|
39
|
-
codegraph_cli-2.
|
|
40
|
-
codegraph_cli-2.
|
|
41
|
-
codegraph_cli-2.
|
|
42
|
-
codegraph_cli-2.
|
|
43
|
-
codegraph_cli-2.
|
|
38
|
+
codegraph_cli-2.1.0.dist-info/licenses/LICENSE,sha256=3PiQTjpJW4DDJz8k5pk-WqX9TrVQD3fNrVNzbTEyW-A,1066
|
|
39
|
+
codegraph_cli-2.1.0.dist-info/METADATA,sha256=p1GW3UN_gchcHGMyx7_mMT9zZpGkTNbHEcR2DBst6xg,11183
|
|
40
|
+
codegraph_cli-2.1.0.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
|
|
41
|
+
codegraph_cli-2.1.0.dist-info/entry_points.txt,sha256=_p5CutxbiWjGVTx9GPeYJ30XOblccdf7SCCNtCkPnaA,45
|
|
42
|
+
codegraph_cli-2.1.0.dist-info/top_level.txt,sha256=XKmdlLsrhdgVW-pN4vzdo-ZTl-9_Rk94SXcM2YRAmHk,14
|
|
43
|
+
codegraph_cli-2.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|