codetree-rag 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codetree/__init__.py ADDED
@@ -0,0 +1,13 @@
1
+ """
2
+ CodeTree - Vectorless RAG for Code Repositories
3
+
4
+ Navigate your codebase like a human expert using LLM reasoning.
5
+ """
6
+
7
+ from .core import CodeTree
8
+ from .indexer import CodeIndexer
9
+ from .retriever import CodeRetriever
10
+ from .config import Config
11
+
12
+ __version__ = "0.1.0"
13
+ __all__ = ["CodeTree", "CodeIndexer", "CodeRetriever", "Config"]
codetree/cli.py ADDED
@@ -0,0 +1,220 @@
1
+ """Command-line interface for CodeTree."""
2
+
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ import click
7
+ from rich.console import Console
8
+ from rich.panel import Panel
9
+ from rich.syntax import Syntax
10
+ from rich.tree import Tree as RichTree
11
+
12
+ from .core import CodeTree
13
+ from .config import Config
14
+
15
+ console = Console()
16
+
17
+
18
+ @click.group()
19
+ @click.version_option(version="0.1.0")
20
+ def main():
21
+ """🌲 CodeTree - Vectorless RAG for Code Repositories"""
22
+ pass
23
+
24
+
25
+ @main.command()
26
+ @click.argument("repo_path", type=click.Path(exists=True), default=".")
27
+ @click.option("--output", "-o", type=click.Path(), help="Output path for index JSON")
28
+ def index(repo_path: str, output: str | None):
29
+ """Build an index for a code repository."""
30
+ repo_path = Path(repo_path).resolve()
31
+
32
+ console.print(f"\n🌲 [bold]CodeTree[/bold] - Indexing repository...")
33
+ console.print(f" Path: [cyan]{repo_path}[/cyan]\n")
34
+
35
+ with console.status("[bold green]Parsing code..."):
36
+ tree = CodeTree(repo_path)
37
+ index = tree.build_index(save=(output is None))
38
+
39
+ # Print stats
40
+ console.print(Panel.fit(
41
+ f"[green]āœ“[/green] Indexed [bold]{index.total_files}[/bold] files\n"
42
+ f"[green]āœ“[/green] [bold]{index.total_lines:,}[/bold] total lines\n"
43
+ f"[green]āœ“[/green] Languages: {', '.join(index.languages.keys())}",
44
+ title="Index Complete",
45
+ border_style="green",
46
+ ))
47
+
48
+ if output:
49
+ output_path = Path(output)
50
+ tree.indexer.save_index(index, output_path)
51
+ console.print(f"\nšŸ“ Index saved to: [cyan]{output_path}[/cyan]")
52
+ else:
53
+ console.print(f"\nšŸ“ Index saved to: [cyan]{repo_path / '.codetree' / 'index.json'}[/cyan]")
54
+
55
+
56
+ @main.command()
57
+ @click.argument("question")
58
+ @click.option("--repo", "-r", type=click.Path(exists=True), default=".", help="Repository path")
59
+ def query(question: str, repo: str):
60
+ """Query the codebase with a natural language question."""
61
+ repo_path = Path(repo).resolve()
62
+
63
+ tree = CodeTree(repo_path)
64
+
65
+ if tree.index is None:
66
+ console.print("[yellow]No index found. Building index first...[/yellow]\n")
67
+ with console.status("[bold green]Indexing..."):
68
+ tree.build_index()
69
+
70
+ console.print(f"\nšŸ” [bold]Question:[/bold] {question}\n")
71
+
72
+ with console.status("[bold green]Thinking..."):
73
+ answer = tree.query(question)
74
+
75
+ console.print(Panel(answer, title="Answer", border_style="blue"))
76
+
77
+
78
+ @main.command()
79
+ @click.option("--repo", "-r", type=click.Path(exists=True), default=".", help="Repository path")
80
+ def chat(repo: str):
81
+ """Interactive chat mode for querying the codebase."""
82
+ repo_path = Path(repo).resolve()
83
+
84
+ tree = CodeTree(repo_path)
85
+
86
+ if tree.index is None:
87
+ console.print("[yellow]No index found. Building index first...[/yellow]\n")
88
+ with console.status("[bold green]Indexing..."):
89
+ tree.build_index()
90
+
91
+ console.print(Panel.fit(
92
+ "🌲 [bold]CodeTree Interactive Mode[/bold]\n\n"
93
+ f"Repository: [cyan]{repo_path.name}[/cyan]\n"
94
+ f"Files indexed: [green]{tree.index.total_files}[/green]\n\n"
95
+ "Type your questions about the code.\n"
96
+ "Commands: [dim]/tree, /stats, /find <symbol>, /quit[/dim]",
97
+ border_style="green",
98
+ ))
99
+
100
+ while True:
101
+ try:
102
+ question = console.input("\n[bold cyan]You:[/bold cyan] ").strip()
103
+ except (EOFError, KeyboardInterrupt):
104
+ console.print("\n\nšŸ‘‹ Goodbye!")
105
+ break
106
+
107
+ if not question:
108
+ continue
109
+
110
+ if question.lower() in ("/quit", "/exit", "/q"):
111
+ console.print("\nšŸ‘‹ Goodbye!")
112
+ break
113
+
114
+ if question.lower() == "/tree":
115
+ console.print("\n" + tree.tree(max_depth=3))
116
+ continue
117
+
118
+ if question.lower() == "/stats":
119
+ stats = tree.stats()
120
+ console.print(Panel.fit(
121
+ f"Files: [bold]{stats['total_files']}[/bold]\n"
122
+ f"Lines: [bold]{stats['total_lines']:,}[/bold]\n"
123
+ f"Languages: {', '.join(f'{k}({v})' for k, v in stats['languages'].items())}",
124
+ title="Repository Stats",
125
+ ))
126
+ continue
127
+
128
+ if question.lower().startswith("/find "):
129
+ symbol = question[6:].strip()
130
+ refs = tree.find(symbol)
131
+ if refs:
132
+ console.print(f"\nšŸ“ Found [bold]{len(refs)}[/bold] references to '{symbol}':\n")
133
+ for ref in refs[:20]:
134
+ console.print(f" • [{ref['type']}] {ref['file']}: {ref.get('name', ref.get('statement', ''))}")
135
+ else:
136
+ console.print(f"\n[yellow]No references found for '{symbol}'[/yellow]")
137
+ continue
138
+
139
+ # Regular question
140
+ with console.status("[bold green]Thinking..."):
141
+ answer = tree.query(question)
142
+
143
+ console.print(f"\n[bold green]CodeTree:[/bold green]\n{answer}")
144
+
145
+
146
+ @main.command()
147
+ @click.option("--repo", "-r", type=click.Path(exists=True), default=".", help="Repository path")
148
+ @click.option("--depth", "-d", type=int, default=3, help="Maximum tree depth")
149
+ def tree(repo: str, depth: int):
150
+ """Show the code tree structure."""
151
+ repo_path = Path(repo).resolve()
152
+
153
+ ct = CodeTree(repo_path)
154
+
155
+ if ct.index is None:
156
+ console.print("[yellow]No index found. Building index first...[/yellow]\n")
157
+ with console.status("[bold green]Indexing..."):
158
+ ct.build_index()
159
+
160
+ console.print(f"\n🌲 [bold]Code Tree:[/bold] {repo_path.name}\n")
161
+ console.print(ct.tree(max_depth=depth))
162
+
163
+
164
+ @main.command()
165
+ @click.argument("symbol")
166
+ @click.option("--repo", "-r", type=click.Path(exists=True), default=".", help="Repository path")
167
+ def find(symbol: str, repo: str):
168
+ """Find references to a symbol in the codebase."""
169
+ repo_path = Path(repo).resolve()
170
+
171
+ ct = CodeTree(repo_path)
172
+
173
+ if ct.index is None:
174
+ console.print("[yellow]No index found. Building index first...[/yellow]\n")
175
+ with console.status("[bold green]Indexing..."):
176
+ ct.build_index()
177
+
178
+ refs = ct.find(symbol)
179
+
180
+ if refs:
181
+ console.print(f"\nšŸ“ Found [bold]{len(refs)}[/bold] references to '[cyan]{symbol}[/cyan]':\n")
182
+ for ref in refs:
183
+ if ref["type"] == "import":
184
+ console.print(f" [dim]import[/dim] {ref['file']}: {ref['statement']}")
185
+ else:
186
+ line_info = f":{ref['line']}" if ref.get('line') else ""
187
+ console.print(f" [dim]{ref['type']:8}[/dim] {ref['file']}{line_info} → {ref['name']}")
188
+ else:
189
+ console.print(f"\n[yellow]No references found for '{symbol}'[/yellow]")
190
+
191
+
192
+ @main.command()
193
+ @click.option("--repo", "-r", type=click.Path(exists=True), default=".", help="Repository path")
194
+ def stats(repo: str):
195
+ """Show statistics about the indexed repository."""
196
+ repo_path = Path(repo).resolve()
197
+
198
+ ct = CodeTree(repo_path)
199
+
200
+ if ct.index is None:
201
+ console.print("[red]No index found. Run 'codetree index' first.[/red]")
202
+ sys.exit(1)
203
+
204
+ s = ct.stats()
205
+
206
+ console.print(Panel.fit(
207
+ f"[bold]Repository:[/bold] {repo_path.name}\n"
208
+ f"[bold]Path:[/bold] {s['repo_path']}\n\n"
209
+ f"[bold]Files:[/bold] {s['total_files']}\n"
210
+ f"[bold]Lines:[/bold] {s['total_lines']:,}\n\n"
211
+ f"[bold]Languages:[/bold]\n" +
212
+ "\n".join(f" • {lang}: {count} files" for lang, count in s['languages'].items()) +
213
+ f"\n\n[dim]Indexed at: {s['created_at']}[/dim]",
214
+ title="šŸ“Š Repository Statistics",
215
+ border_style="blue",
216
+ ))
217
+
218
+
219
+ if __name__ == "__main__":
220
+ main()
codetree/config.py ADDED
@@ -0,0 +1,110 @@
1
+ """Configuration management for CodeTree."""
2
+
3
+ import os
4
+ from dataclasses import dataclass, field
5
+ from pathlib import Path
6
+ from typing import Optional
7
+ import yaml
8
+
9
+
10
+ @dataclass
11
+ class LLMConfig:
12
+ """LLM provider configuration."""
13
+ provider: str = "openai"
14
+ model: str = "gpt-4o"
15
+ api_key: Optional[str] = None
16
+ base_url: Optional[str] = None
17
+ temperature: float = 0.0
18
+ max_tokens: int = 4096
19
+
20
+ def __post_init__(self):
21
+ # Resolve environment variables
22
+ if self.api_key and self.api_key.startswith("${") and self.api_key.endswith("}"):
23
+ env_var = self.api_key[2:-1]
24
+ self.api_key = os.environ.get(env_var)
25
+
26
+ # Try default env vars if no key set
27
+ if not self.api_key:
28
+ if self.provider == "openai":
29
+ self.api_key = os.environ.get("OPENAI_API_KEY")
30
+ elif self.provider == "anthropic":
31
+ self.api_key = os.environ.get("ANTHROPIC_API_KEY")
32
+
33
+
34
+ @dataclass
35
+ class IndexConfig:
36
+ """Index configuration."""
37
+ languages: list[str] = field(default_factory=lambda: ["python", "javascript", "typescript", "go", "rust", "java"])
38
+ exclude: list[str] = field(default_factory=lambda: [
39
+ "node_modules", "__pycache__", ".git", ".venv", "venv",
40
+ "dist", "build", ".egg-info", ".tox", ".pytest_cache"
41
+ ])
42
+ include_patterns: list[str] = field(default_factory=list)
43
+ max_file_size: int = 100_000 # bytes
44
+ max_files: int = 10_000
45
+
46
+
47
+ @dataclass
48
+ class Config:
49
+ """Main configuration class."""
50
+ llm: LLMConfig = field(default_factory=LLMConfig)
51
+ index: IndexConfig = field(default_factory=IndexConfig)
52
+ cache_dir: Path = field(default_factory=lambda: Path.home() / ".cache" / "codetree")
53
+
54
+ @classmethod
55
+ def load(cls, config_path: Optional[Path] = None) -> "Config":
56
+ """Load configuration from file or defaults."""
57
+ config = cls()
58
+
59
+ # Search for config file
60
+ search_paths = []
61
+ if config_path:
62
+ search_paths.append(config_path)
63
+ search_paths.extend([
64
+ Path.cwd() / ".codetree.yaml",
65
+ Path.cwd() / ".codetree.yml",
66
+ Path.home() / ".config" / "codetree" / "config.yaml",
67
+ ])
68
+
69
+ for path in search_paths:
70
+ if path.exists():
71
+ config = cls.from_yaml(path)
72
+ break
73
+
74
+ return config
75
+
76
+ @classmethod
77
+ def from_yaml(cls, path: Path) -> "Config":
78
+ """Load configuration from YAML file."""
79
+ with open(path) as f:
80
+ data = yaml.safe_load(f) or {}
81
+
82
+ llm_data = data.get("llm", {})
83
+ index_data = data.get("index", {})
84
+
85
+ return cls(
86
+ llm=LLMConfig(**llm_data),
87
+ index=IndexConfig(**index_data),
88
+ cache_dir=Path(data.get("cache_dir", Path.home() / ".cache" / "codetree")),
89
+ )
90
+
91
+ def to_yaml(self, path: Path) -> None:
92
+ """Save configuration to YAML file."""
93
+ data = {
94
+ "llm": {
95
+ "provider": self.llm.provider,
96
+ "model": self.llm.model,
97
+ "temperature": self.llm.temperature,
98
+ "max_tokens": self.llm.max_tokens,
99
+ },
100
+ "index": {
101
+ "languages": self.index.languages,
102
+ "exclude": self.index.exclude,
103
+ "max_file_size": self.index.max_file_size,
104
+ },
105
+ "cache_dir": str(self.cache_dir),
106
+ }
107
+
108
+ path.parent.mkdir(parents=True, exist_ok=True)
109
+ with open(path, "w") as f:
110
+ yaml.dump(data, f, default_flow_style=False)
codetree/core.py ADDED
@@ -0,0 +1,179 @@
1
+ """Core CodeTree class - main entry point."""
2
+
3
+ from pathlib import Path
4
+ from typing import Optional
5
+
6
+ from .config import Config
7
+ from .indexer import CodeIndexer, CodeIndex
8
+ from .retriever import CodeRetriever
9
+
10
+
11
+ class CodeTree:
12
+ """
13
+ Main CodeTree class for indexing and querying code repositories.
14
+
15
+ Example usage:
16
+ tree = CodeTree("/path/to/repo")
17
+ tree.build_index()
18
+ answer = tree.query("How does authentication work?")
19
+ """
20
+
21
+ def __init__(
22
+ self,
23
+ repo_path: str | Path,
24
+ config: Optional[Config] = None,
25
+ ):
26
+ """
27
+ Initialize CodeTree for a repository.
28
+
29
+ Args:
30
+ repo_path: Path to the repository to index
31
+ config: Optional configuration object
32
+ """
33
+ self.repo_path = Path(repo_path).resolve()
34
+ self.config = config or Config.load()
35
+ self.indexer = CodeIndexer(self.config)
36
+ self._index: Optional[CodeIndex] = None
37
+ self._retriever: Optional[CodeRetriever] = None
38
+
39
+ # Check for existing index
40
+ self._index_path = self._get_index_path()
41
+
42
+ def _get_index_path(self) -> Path:
43
+ """Get the path where index should be stored."""
44
+ # Store in repo's .codetree directory
45
+ return self.repo_path / ".codetree" / "index.json"
46
+
47
+ @property
48
+ def index(self) -> Optional[CodeIndex]:
49
+ """Get the current index, loading from disk if available."""
50
+ if self._index is None and self._index_path.exists():
51
+ self._index = self.indexer.load_index(self._index_path)
52
+ return self._index
53
+
54
+ @property
55
+ def retriever(self) -> CodeRetriever:
56
+ """Get the retriever, initializing if needed."""
57
+ if self._retriever is None:
58
+ if self.index is None:
59
+ raise RuntimeError("No index available. Run build_index() first.")
60
+ self._retriever = CodeRetriever(self.index, self.config)
61
+ return self._retriever
62
+
63
+ def build_index(self, save: bool = True) -> CodeIndex:
64
+ """
65
+ Build the code index for the repository.
66
+
67
+ Args:
68
+ save: Whether to save the index to disk
69
+
70
+ Returns:
71
+ The built CodeIndex
72
+ """
73
+ self._index = self.indexer.build_index(self.repo_path)
74
+ self._retriever = None # Reset retriever
75
+
76
+ if save:
77
+ self.indexer.save_index(self._index, self._index_path)
78
+
79
+ return self._index
80
+
81
+ def query(self, question: str) -> str:
82
+ """
83
+ Query the codebase with a natural language question.
84
+
85
+ Args:
86
+ question: The question to ask about the code
87
+
88
+ Returns:
89
+ Answer based on relevant code sections
90
+ """
91
+ return self.retriever.query(question)
92
+
93
+ def find(self, symbol: str) -> list[dict]:
94
+ """
95
+ Find all occurrences of a symbol in the codebase.
96
+
97
+ Args:
98
+ symbol: Symbol name to search for (function, class, etc.)
99
+
100
+ Returns:
101
+ List of references found
102
+ """
103
+ if self.index is None:
104
+ raise RuntimeError("No index available. Run build_index() first.")
105
+ return self._find_references(symbol)
106
+
107
+ def _find_references(self, symbol: str) -> list[dict]:
108
+ """Find all references to a symbol across the codebase (no LLM needed)."""
109
+ from .indexer import TreeNode
110
+ references = []
111
+
112
+ def search_node(node: TreeNode):
113
+ if node.type == "file":
114
+ # Check functions
115
+ for func in node.functions:
116
+ if symbol.lower() in func.get("name", "").lower():
117
+ references.append({
118
+ "type": "function",
119
+ "file": node.path,
120
+ "name": func["name"],
121
+ "line": func.get("line"),
122
+ })
123
+
124
+ # Check classes
125
+ for cls in node.classes:
126
+ if symbol.lower() in cls.get("name", "").lower():
127
+ references.append({
128
+ "type": "class",
129
+ "file": node.path,
130
+ "name": cls["name"],
131
+ "line": cls.get("line"),
132
+ })
133
+
134
+ # Check imports
135
+ for imp in node.imports:
136
+ if symbol.lower() in imp.lower():
137
+ references.append({
138
+ "type": "import",
139
+ "file": node.path,
140
+ "statement": imp,
141
+ })
142
+ else:
143
+ for child in node.children:
144
+ search_node(child)
145
+
146
+ search_node(self.index.root)
147
+ return references
148
+
149
+ def tree(self, max_depth: int = 3) -> str:
150
+ """
151
+ Get a text representation of the code tree.
152
+
153
+ Args:
154
+ max_depth: Maximum depth to display
155
+
156
+ Returns:
157
+ Tree structure as string
158
+ """
159
+ if self.index is None:
160
+ raise RuntimeError("No index available. Run build_index() first.")
161
+ return self.index.get_compact_tree(max_depth)
162
+
163
+ def stats(self) -> dict:
164
+ """
165
+ Get statistics about the indexed repository.
166
+
167
+ Returns:
168
+ Dictionary with stats (files, lines, languages, etc.)
169
+ """
170
+ if self.index is None:
171
+ raise RuntimeError("No index available. Run build_index() first.")
172
+
173
+ return {
174
+ "repo_path": self.index.repo_path,
175
+ "total_files": self.index.total_files,
176
+ "total_lines": self.index.total_lines,
177
+ "languages": self.index.languages,
178
+ "created_at": self.index.created_at,
179
+ }