contextly 0.1.0__tar.gz → 0.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {contextly-0.1.0 → contextly-0.1.7}/PKG-INFO +7 -12
- {contextly-0.1.0 → contextly-0.1.7}/pyproject.toml +8 -15
- contextly-0.1.7/requirements.txt +19 -0
- contextly-0.1.7/src/contextly/app.py +111 -0
- {contextly-0.1.0 → contextly-0.1.7}/src/contextly/cli.py +49 -13
- contextly-0.1.7/src/contextly/core/__init__.py +0 -0
- {contextly-0.1.0 → contextly-0.1.7}/src/contextly/core/analyzer.py +13 -6
- contextly-0.1.7/src/contextly/core/embeddings.py +129 -0
- contextly-0.1.7/src/contextly/core/sync.py +89 -0
- contextly-0.1.7/src/contextly/llm/__init__.py +13 -0
- {contextly-0.1.0 → contextly-0.1.7}/src/contextly/llm/manager.py +13 -0
- {contextly-0.1.0 → contextly-0.1.7}/src/contextly/llm/models.py +48 -15
- {contextly-0.1.0 → contextly-0.1.7}/src/contextly/llm/ollama.py +5 -0
- {contextly-0.1.0 → contextly-0.1.7}/src/contextly/llm/openai.py +3 -1
- contextly-0.1.7/tests/conftest.py +33 -0
- {contextly-0.1.0 → contextly-0.1.7}/tests/test_core.py +0 -1
- contextly-0.1.7/tests/test_integration.py +170 -0
- contextly-0.1.0/src/contextly/app.py +0 -85
- contextly-0.1.0/src/contextly/core/embeddings.py +0 -109
- contextly-0.1.0/src/contextly/core/sync.py +0 -66
- {contextly-0.1.0 → contextly-0.1.7}/.gitignore +0 -0
- {contextly-0.1.0 → contextly-0.1.7}/LICENSE +0 -0
- {contextly-0.1.0 → contextly-0.1.7}/README.md +0 -0
- {contextly-0.1.0 → contextly-0.1.7}/src/contextly/__init__.py +0 -0
- {contextly-0.1.0 → contextly-0.1.7}/src/contextly/llm/base.py +0 -0
- {contextly-0.1.0 → contextly-0.1.7}/src/contextly/parsers/base.py +0 -0
- {contextly-0.1.0 → contextly-0.1.7}/src/contextly/parsers/config.py +0 -0
- {contextly-0.1.0 → contextly-0.1.7}/src/contextly/parsers/javascript.py +0 -0
- {contextly-0.1.0 → contextly-0.1.7}/src/contextly/parsers/python.py +0 -0
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: contextly
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.7
|
|
4
4
|
Summary: AI Context Engine for Developers
|
|
5
|
-
Project-URL: Homepage, https://github.com/
|
|
6
|
-
Project-URL: Repository, https://github.com/
|
|
7
|
-
Project-URL: Documentation, https://github.com/
|
|
8
|
-
Project-URL: Bug Tracker, https://github.com/
|
|
9
|
-
Author-email: Contextly Team <
|
|
5
|
+
Project-URL: Homepage, https://github.com/desenyon/contextly
|
|
6
|
+
Project-URL: Repository, https://github.com/desenyon/contextly
|
|
7
|
+
Project-URL: Documentation, https://github.com/desenyon/contextly#readme
|
|
8
|
+
Project-URL: Bug Tracker, https://github.com/desenyon/contextly/issues
|
|
9
|
+
Author-email: Contextly Team <desenyon@gmail.com>
|
|
10
10
|
License-Expression: MIT
|
|
11
11
|
License-File: LICENSE
|
|
12
12
|
Keywords: ai,analysis,code,context,development,documentation,llm
|
|
@@ -41,16 +41,11 @@ Requires-Dist: toml>=0.10.2
|
|
|
41
41
|
Requires-Dist: tqdm>=4.66.1
|
|
42
42
|
Requires-Dist: transformers>=4.35.0
|
|
43
43
|
Requires-Dist: typer>=0.9.0
|
|
44
|
-
Provides-Extra: dev
|
|
45
|
-
Requires-Dist: black>=23.0.0; extra == 'dev'
|
|
46
|
-
Requires-Dist: isort>=5.12.0; extra == 'dev'
|
|
47
|
-
Requires-Dist: mypy>=1.5.0; extra == 'dev'
|
|
48
|
-
Requires-Dist: pre-commit>=3.3.0; extra == 'dev'
|
|
49
|
-
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
50
44
|
Provides-Extra: test
|
|
51
45
|
Requires-Dist: black>=23.0.0; extra == 'test'
|
|
52
46
|
Requires-Dist: isort>=5.12.0; extra == 'test'
|
|
53
47
|
Requires-Dist: mypy>=1.5.0; extra == 'test'
|
|
48
|
+
Requires-Dist: pre-commit>=3.3.0; extra == 'test'
|
|
54
49
|
Requires-Dist: pytest-asyncio>=0.21.0; extra == 'test'
|
|
55
50
|
Requires-Dist: pytest-cov>=4.0.0; extra == 'test'
|
|
56
51
|
Requires-Dist: pytest>=7.0.0; extra == 'test'
|
|
@@ -4,9 +4,9 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "contextly"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.7"
|
|
8
8
|
authors = [
|
|
9
|
-
{ name = "Contextly Team", email = "
|
|
9
|
+
{ name = "Contextly Team", email = "desenyon@gmail.com" },
|
|
10
10
|
]
|
|
11
11
|
description = "AI Context Engine for Developers"
|
|
12
12
|
readme = "README.md"
|
|
@@ -58,29 +58,22 @@ test = [
|
|
|
58
58
|
"isort>=5.12.0",
|
|
59
59
|
"mypy>=1.5.0",
|
|
60
60
|
"ruff>=0.1.0",
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
dev = [
|
|
64
|
-
"pre-commit>=3.3.0",
|
|
65
|
-
"black>=23.0.0",
|
|
66
|
-
"isort>=5.12.0",
|
|
67
|
-
"mypy>=1.5.0",
|
|
68
|
-
"ruff>=0.1.0",
|
|
61
|
+
"pre-commit>=3.3.0"
|
|
69
62
|
]
|
|
70
63
|
|
|
71
64
|
[project.scripts]
|
|
72
65
|
contextly = "contextly.cli:app"
|
|
73
66
|
|
|
74
67
|
[project.urls]
|
|
75
|
-
Homepage = "https://github.com/
|
|
76
|
-
Repository = "https://github.com/
|
|
77
|
-
Documentation = "https://github.com/
|
|
78
|
-
"Bug Tracker" = "https://github.com/
|
|
68
|
+
Homepage = "https://github.com/desenyon/contextly"
|
|
69
|
+
Repository = "https://github.com/desenyon/contextly"
|
|
70
|
+
Documentation = "https://github.com/desenyon/contextly#readme"
|
|
71
|
+
"Bug Tracker" = "https://github.com/desenyon/contextly/issues"
|
|
79
72
|
|
|
80
73
|
[tool.pytest.ini_options]
|
|
81
74
|
testpaths = ["tests"]
|
|
82
75
|
python_files = ["test_*.py"]
|
|
83
|
-
addopts = "
|
|
76
|
+
addopts = "-v"
|
|
84
77
|
|
|
85
78
|
[tool.black]
|
|
86
79
|
line-length = 100
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Core dependencies
|
|
2
|
+
pyyaml>=6.0.1
|
|
3
|
+
python-dotenv>=1.0.0
|
|
4
|
+
|
|
5
|
+
# LLM and embeddings
|
|
6
|
+
openai>=1.0.0
|
|
7
|
+
langchain>=0.0.300
|
|
8
|
+
transformers>=4.34.0
|
|
9
|
+
sentence-transformers>=2.2.2
|
|
10
|
+
|
|
11
|
+
# Testing
|
|
12
|
+
pytest>=7.4.2
|
|
13
|
+
pytest-cov>=4.1.0
|
|
14
|
+
|
|
15
|
+
# Development
|
|
16
|
+
black>=23.9.1
|
|
17
|
+
isort>=5.12.0
|
|
18
|
+
flake8>=6.1.0
|
|
19
|
+
mypy>=1.5.1
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core business logic for Contextly commands.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict, Any, Optional
|
|
7
|
+
|
|
8
|
+
from .core.analyzer import CodeAnalyzer
|
|
9
|
+
from .core.embeddings import EmbeddingEngine
|
|
10
|
+
from .core.sync import RepoSync
|
|
11
|
+
from .llm import LLMManager
|
|
12
|
+
|
|
13
|
+
class Contextly:
|
|
14
|
+
"""Main class coordinating Contextly's functionality."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, repo_path: Optional[Path] = None, model: Optional[str] = None):
|
|
17
|
+
self.repo_path = repo_path or Path.cwd()
|
|
18
|
+
self.llm_manager = LLMManager(model)
|
|
19
|
+
self.analyzer = CodeAnalyzer(self.repo_path)
|
|
20
|
+
self.embedding_engine = EmbeddingEngine(self.repo_path)
|
|
21
|
+
self.repo_sync = RepoSync(self.repo_path)
|
|
22
|
+
|
|
23
|
+
def ask(self, question: str) -> Dict[str, Any]:
|
|
24
|
+
"""Answer questions about the codebase."""
|
|
25
|
+
try:
|
|
26
|
+
# Search for relevant code
|
|
27
|
+
search_results = self.embedding_engine.search(question)
|
|
28
|
+
|
|
29
|
+
# Build context from search results
|
|
30
|
+
context = []
|
|
31
|
+
for result in search_results['results']:
|
|
32
|
+
context.append(f"From {result['file']}:\n{result['content']}\n")
|
|
33
|
+
|
|
34
|
+
if not context:
|
|
35
|
+
return {
|
|
36
|
+
'answer': 'No relevant code found to answer the question.',
|
|
37
|
+
'context': search_results
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
# Generate answer using LLM
|
|
41
|
+
context_str = '\n'.join(context)
|
|
42
|
+
context_dict = {
|
|
43
|
+
'question': question,
|
|
44
|
+
'code_snippets': context,
|
|
45
|
+
'files': [r['file'] for r in search_results['results']]
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
answer = self.llm_manager.explain_code(context_str, context_dict)
|
|
49
|
+
|
|
50
|
+
return {
|
|
51
|
+
'answer': answer,
|
|
52
|
+
'context': search_results
|
|
53
|
+
}
|
|
54
|
+
except Exception as e:
|
|
55
|
+
return {
|
|
56
|
+
'error': f'Failed to process question: {str(e)}',
|
|
57
|
+
'context': None
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
def explain(self, location: str) -> str:
|
|
61
|
+
"""Explain code at specific location."""
|
|
62
|
+
try:
|
|
63
|
+
if ':' not in location:
|
|
64
|
+
return "Invalid location format. Use 'file:line_number'"
|
|
65
|
+
file_path, line_number = location.split(':')
|
|
66
|
+
return self.analyzer.explain_code(file_path, int(line_number))
|
|
67
|
+
except ValueError:
|
|
68
|
+
return f"Invalid line number in location: {location}"
|
|
69
|
+
except Exception as e:
|
|
70
|
+
return f"Error explaining code: {str(e)}"
|
|
71
|
+
|
|
72
|
+
def search(self, term: str) -> Dict[str, Any]:
|
|
73
|
+
"""Semantic search across codebase."""
|
|
74
|
+
try:
|
|
75
|
+
results = self.embedding_engine.search(term)
|
|
76
|
+
return {
|
|
77
|
+
'query': results['query'],
|
|
78
|
+
'results': results['results']
|
|
79
|
+
}
|
|
80
|
+
except Exception as e:
|
|
81
|
+
return {
|
|
82
|
+
'query': term,
|
|
83
|
+
'error': str(e),
|
|
84
|
+
'results': []
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
def diff(self, path1: Path, path2: Path) -> Dict[str, Any]:
|
|
88
|
+
"""Compare configuration files."""
|
|
89
|
+
return self.analyzer.compare_configs(path1, path2)
|
|
90
|
+
|
|
91
|
+
def sync(self) -> None:
|
|
92
|
+
"""Build or rebuild the embedding index."""
|
|
93
|
+
# Scan repository
|
|
94
|
+
files = list(self.repo_sync.scan_files())
|
|
95
|
+
|
|
96
|
+
# Build index
|
|
97
|
+
index = self.repo_sync.index_repository()
|
|
98
|
+
|
|
99
|
+
# Convert index to list of documents
|
|
100
|
+
docs = []
|
|
101
|
+
for file_path, file_data in index.items():
|
|
102
|
+
docs.extend(file_data.get('chunks', []))
|
|
103
|
+
|
|
104
|
+
# Build or update the embedding index with the documents
|
|
105
|
+
try:
|
|
106
|
+
if docs:
|
|
107
|
+
self.embedding_engine.embed_documents(docs)
|
|
108
|
+
else:
|
|
109
|
+
print("No documents found to index")
|
|
110
|
+
except Exception as e:
|
|
111
|
+
raise RuntimeError(f"Failed to update search index: {str(e)}")
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
Main CLI interface for Contextly
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
+
import os
|
|
5
6
|
import typer
|
|
6
7
|
from rich import print
|
|
7
8
|
from rich.console import Console
|
|
@@ -36,17 +37,28 @@ def ask(
|
|
|
36
37
|
print("🔍 Searching for answer...")
|
|
37
38
|
result = get_contextly().ask(question)
|
|
38
39
|
|
|
40
|
+
# Print answer or error in a panel
|
|
41
|
+
if 'error' in result:
|
|
42
|
+
console.print(Panel(
|
|
43
|
+
result['error'],
|
|
44
|
+
title="❌ Error",
|
|
45
|
+
border_style="red"
|
|
46
|
+
))
|
|
47
|
+
return
|
|
48
|
+
|
|
39
49
|
# Print answer in a panel
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
+
if 'answer' in result:
|
|
51
|
+
console.print(Panel(
|
|
52
|
+
Markdown(result['answer']),
|
|
53
|
+
title="💡 Answer",
|
|
54
|
+
border_style="blue"
|
|
55
|
+
))
|
|
56
|
+
|
|
57
|
+
# Print relevant code snippets
|
|
58
|
+
if result.get('context', {}).get('results'):
|
|
59
|
+
console.print("\n📚 Relevant code:")
|
|
60
|
+
for snippet in result['context']['results']:
|
|
61
|
+
console.print("") # Add a blank line for readability
|
|
50
62
|
console.print(Panel(
|
|
51
63
|
Syntax(
|
|
52
64
|
snippet['content'],
|
|
@@ -133,9 +145,33 @@ def sync(
|
|
|
133
145
|
),
|
|
134
146
|
) -> None:
|
|
135
147
|
"""Build or rebuild the local embedding index."""
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
148
|
+
# Create fancy header
|
|
149
|
+
console.print(Panel(
|
|
150
|
+
"[bold blue]Contextly Repository Sync[/]",
|
|
151
|
+
subtitle="Building semantic search index",
|
|
152
|
+
style="blue"
|
|
153
|
+
))
|
|
154
|
+
|
|
155
|
+
contextly = get_contextly(path)
|
|
156
|
+
|
|
157
|
+
# Check model status first
|
|
158
|
+
model_name = os.getenv('CONTEXTLY_MODEL', 'codellama')
|
|
159
|
+
status = contextly.llm_manager.model_manager.registry.check_model_status(model_name)
|
|
160
|
+
|
|
161
|
+
if status['installed'] and status['ready']:
|
|
162
|
+
console.print(f"[green]✓[/] Using existing {model_name} model")
|
|
163
|
+
else:
|
|
164
|
+
console.print(f"[yellow]![/] {status['message']}")
|
|
165
|
+
console.print(f"[yellow]⚡[/] Downloading {model_name}...")
|
|
166
|
+
|
|
167
|
+
with console.status("[bold blue]📚 Analyzing repository structure...") as status:
|
|
168
|
+
try:
|
|
169
|
+
contextly.sync()
|
|
170
|
+
console.print("\n[green]✓[/] Repository indexed successfully!")
|
|
171
|
+
console.print("[dim]Run 'contextly ask' to start querying your codebase[/]")
|
|
172
|
+
except Exception as e:
|
|
173
|
+
console.print(f"\n[red]✗[/] Error: {str(e)}")
|
|
174
|
+
raise typer.Exit(1)
|
|
139
175
|
|
|
140
176
|
@model_app.command("list")
|
|
141
177
|
def list_models() -> None:
|
|
File without changes
|
|
@@ -5,10 +5,12 @@ Core functionality for code parsing and analysis.
|
|
|
5
5
|
import difflib
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
from typing import List, Dict, Any, Optional
|
|
8
|
-
from
|
|
9
|
-
from
|
|
10
|
-
from
|
|
11
|
-
from
|
|
8
|
+
from typing import Type
|
|
9
|
+
from contextly.llm.manager import LLMManager
|
|
10
|
+
from contextly.parsers.base import BaseParser
|
|
11
|
+
from contextly.parsers.python import PythonParser
|
|
12
|
+
from contextly.parsers.javascript import JavaScriptParser
|
|
13
|
+
from contextly.parsers.config import ConfigParser
|
|
12
14
|
|
|
13
15
|
class Parser:
|
|
14
16
|
"""Base class for parsing different file types."""
|
|
@@ -66,8 +68,13 @@ class CodeAnalyzer:
|
|
|
66
68
|
return f"No code found at line {line_number}"
|
|
67
69
|
|
|
68
70
|
# Get explanation from LLM
|
|
69
|
-
|
|
70
|
-
|
|
71
|
+
code = target_chunk['content']
|
|
72
|
+
context = {
|
|
73
|
+
'file_type': result['file_type'],
|
|
74
|
+
'file_path': file_path,
|
|
75
|
+
'line_number': line_number
|
|
76
|
+
}
|
|
77
|
+
explanation = self.llm.explain_code(code, context)
|
|
71
78
|
|
|
72
79
|
return explanation
|
|
73
80
|
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Vector embedding and similarity search functionality.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
import os
|
|
7
|
+
from typing import List, Dict, Any, Optional
|
|
8
|
+
import chromadb
|
|
9
|
+
from chromadb.config import Settings
|
|
10
|
+
from sentence_transformers import SentenceTransformer
|
|
11
|
+
|
|
12
|
+
class EmbeddingEngine:
|
|
13
|
+
"""Handles document embedding and similarity search."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, repo_path: Path):
|
|
16
|
+
"""Initialize the embedding engine with the given repository path."""
|
|
17
|
+
self.repo_path = repo_path
|
|
18
|
+
self.model: Optional[SentenceTransformer] = None
|
|
19
|
+
self.db = None
|
|
20
|
+
self.collection = None
|
|
21
|
+
self.data_dir = self.repo_path / '.contextly' / 'embeddings'
|
|
22
|
+
self.collection_name = "code_embeddings"
|
|
23
|
+
self.data_dir.mkdir(parents=True, exist_ok=True)
|
|
24
|
+
|
|
25
|
+
def _initialize(self) -> None:
|
|
26
|
+
"""Initialize the model and database if not already initialized."""
|
|
27
|
+
if self.model is None:
|
|
28
|
+
try:
|
|
29
|
+
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
30
|
+
except Exception as e:
|
|
31
|
+
raise RuntimeError(f"Failed to initialize embedding model: {str(e)}")
|
|
32
|
+
|
|
33
|
+
if self.db is None:
|
|
34
|
+
try:
|
|
35
|
+
self.db = chromadb.PersistentClient(path=str(self.data_dir))
|
|
36
|
+
self.collection = self.db.get_or_create_collection(
|
|
37
|
+
name=self.collection_name,
|
|
38
|
+
metadata={"repo_path": str(self.repo_path)}
|
|
39
|
+
)
|
|
40
|
+
except Exception as e:
|
|
41
|
+
raise RuntimeError(f"Failed to initialize ChromaDB: {str(e)}")
|
|
42
|
+
|
|
43
|
+
def _ensure_initialized(self) -> None:
|
|
44
|
+
"""Ensure both model and database are initialized."""
|
|
45
|
+
if self.model is None or self.db is None or self.collection is None:
|
|
46
|
+
self._initialize()
|
|
47
|
+
"""Initialize the embedding model and vector store if not already initialized."""
|
|
48
|
+
self._ensure_initialized()
|
|
49
|
+
|
|
50
|
+
def embed_documents(self, documents: List[Dict[str, Any]]):
|
|
51
|
+
"""Convert documents into vector embeddings and store them."""
|
|
52
|
+
if not documents:
|
|
53
|
+
return
|
|
54
|
+
|
|
55
|
+
# Ensure initialization
|
|
56
|
+
self._ensure_initialized()
|
|
57
|
+
|
|
58
|
+
# Prepare documents for ChromaDB
|
|
59
|
+
texts = []
|
|
60
|
+
metadatas = []
|
|
61
|
+
ids = []
|
|
62
|
+
|
|
63
|
+
for idx, doc in enumerate(documents):
|
|
64
|
+
# Create a meaningful document summary
|
|
65
|
+
doc_text = f"{doc.get('type', 'unknown')} - {doc.get('name', '')}:\n{doc.get('content', '')}"
|
|
66
|
+
texts.append(doc_text)
|
|
67
|
+
|
|
68
|
+
# Store metadata
|
|
69
|
+
metadatas.append({
|
|
70
|
+
"file_path": str(doc.get("file_path", "")),
|
|
71
|
+
"type": doc.get("type", "unknown"),
|
|
72
|
+
"name": doc.get("name", ""),
|
|
73
|
+
"start_line": doc.get("start_line", 0),
|
|
74
|
+
"end_line": doc.get("end_line", 0),
|
|
75
|
+
})
|
|
76
|
+
|
|
77
|
+
# Generate unique ID
|
|
78
|
+
doc_id = f"doc_{idx}_{hash(doc_text) & 0xFFFFFFFF}"
|
|
79
|
+
ids.append(doc_id)
|
|
80
|
+
|
|
81
|
+
# Ensure collection is initialized
|
|
82
|
+
if self.collection is None:
|
|
83
|
+
raise RuntimeError("Collection is not initialized.")
|
|
84
|
+
# Add documents to the collection
|
|
85
|
+
self.collection.add(
|
|
86
|
+
documents=texts,
|
|
87
|
+
metadatas=metadatas,
|
|
88
|
+
ids=ids
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
def search(self, query: str, top_k: int = 5) -> Dict[str, Any]:
|
|
92
|
+
"""Search for most similar documents to query."""
|
|
93
|
+
self._ensure_initialized()
|
|
94
|
+
|
|
95
|
+
# Ensure collection is initialized
|
|
96
|
+
if self.collection is None:
|
|
97
|
+
raise RuntimeError("Collection is not initialized.")
|
|
98
|
+
|
|
99
|
+
# Query the collection
|
|
100
|
+
results = self.collection.query(
|
|
101
|
+
query_texts=[query],
|
|
102
|
+
n_results=top_k
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Format results
|
|
106
|
+
hits = []
|
|
107
|
+
documents = results.get('documents')
|
|
108
|
+
metadatas = results.get('metadatas')
|
|
109
|
+
distances = results.get('distances')
|
|
110
|
+
ids = results.get('ids')
|
|
111
|
+
|
|
112
|
+
if documents is not None and metadatas is not None and ids is not None:
|
|
113
|
+
for i in range(len(ids[0])):
|
|
114
|
+
hits.append({
|
|
115
|
+
'content': documents[0][i] if documents[0] is not None else None,
|
|
116
|
+
'metadata': metadatas[0][i] if metadatas[0] is not None else None,
|
|
117
|
+
'score': distances[0][i] if distances and distances[0] is not None else None,
|
|
118
|
+
'file': metadatas[0][i]['file_path'] if metadatas[0] is not None else None
|
|
119
|
+
})
|
|
120
|
+
|
|
121
|
+
return {
|
|
122
|
+
'query': query,
|
|
123
|
+
'results': hits
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
def clear(self):
|
|
127
|
+
"""Clear all embeddings from storage."""
|
|
128
|
+
if self.collection:
|
|
129
|
+
self.collection.delete()
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Repository synchronization and indexing functionality.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import List, Dict, Any, Iterator, Tuple
|
|
7
|
+
import os
|
|
8
|
+
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
9
|
+
import multiprocessing
|
|
10
|
+
from tqdm import tqdm
|
|
11
|
+
|
|
12
|
+
from ..parsers.python import PythonParser
|
|
13
|
+
from ..parsers.javascript import JavaScriptParser
|
|
14
|
+
from ..parsers.config import ConfigParser
|
|
15
|
+
|
|
16
|
+
class RepoSync:
|
|
17
|
+
"""Handles repository scanning and indexing."""
|
|
18
|
+
|
|
19
|
+
SUPPORTED_EXTENSIONS = {
|
|
20
|
+
# Code files
|
|
21
|
+
'.py': PythonParser,
|
|
22
|
+
'.js': JavaScriptParser,
|
|
23
|
+
'.jsx': JavaScriptParser,
|
|
24
|
+
'.ts': JavaScriptParser,
|
|
25
|
+
'.tsx': JavaScriptParser,
|
|
26
|
+
# Config files
|
|
27
|
+
'.json': ConfigParser,
|
|
28
|
+
'.yml': ConfigParser,
|
|
29
|
+
'.yaml': ConfigParser,
|
|
30
|
+
'.toml': ConfigParser,
|
|
31
|
+
'.env': ConfigParser,
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
IGNORED_DIRS = {'.git', '__pycache__', 'node_modules', 'venv', '.venv', 'dist', 'build'}
|
|
35
|
+
|
|
36
|
+
def __init__(self, repo_path: Path):
|
|
37
|
+
self.repo_path = repo_path
|
|
38
|
+
self.num_workers = max(1, multiprocessing.cpu_count() - 1)
|
|
39
|
+
|
|
40
|
+
def scan_files(self) -> List[Path]:
|
|
41
|
+
"""Scan repository for supported files."""
|
|
42
|
+
files = []
|
|
43
|
+
for root, dirs, filenames in os.walk(self.repo_path):
|
|
44
|
+
# Skip ignored directories
|
|
45
|
+
dirs[:] = [d for d in dirs if d not in self.IGNORED_DIRS]
|
|
46
|
+
|
|
47
|
+
root_path = Path(root)
|
|
48
|
+
for file in filenames:
|
|
49
|
+
file_path = root_path / file
|
|
50
|
+
if file_path.suffix in self.SUPPORTED_EXTENSIONS:
|
|
51
|
+
files.append(file_path)
|
|
52
|
+
return files
|
|
53
|
+
|
|
54
|
+
def _process_file(self, file_path: Path) -> Tuple[str, Dict[str, Any]]:
|
|
55
|
+
"""Process a single file and return its index data."""
|
|
56
|
+
parser_class = self.SUPPORTED_EXTENSIONS.get(file_path.suffix)
|
|
57
|
+
if not parser_class:
|
|
58
|
+
return str(file_path), {}
|
|
59
|
+
|
|
60
|
+
try:
|
|
61
|
+
parser = parser_class()
|
|
62
|
+
result = parser.parse(file_path)
|
|
63
|
+
# Add file path to chunks for reference
|
|
64
|
+
for chunk in result.get('chunks', []):
|
|
65
|
+
chunk['file_path'] = str(file_path)
|
|
66
|
+
return str(file_path), result
|
|
67
|
+
except Exception as e:
|
|
68
|
+
return str(file_path), {'error': str(e)}
|
|
69
|
+
|
|
70
|
+
def index_repository(self) -> Dict[str, Any]:
|
|
71
|
+
"""Build index of repository contents using parallel processing."""
|
|
72
|
+
files = self.scan_files()
|
|
73
|
+
total_files = len(files)
|
|
74
|
+
|
|
75
|
+
if total_files == 0:
|
|
76
|
+
return {}
|
|
77
|
+
|
|
78
|
+
index = {}
|
|
79
|
+
with ProcessPoolExecutor(max_workers=self.num_workers) as executor:
|
|
80
|
+
futures = [executor.submit(self._process_file, f) for f in files]
|
|
81
|
+
|
|
82
|
+
with tqdm(total=total_files, desc="Analyzing files", unit="file") as pbar:
|
|
83
|
+
for future in as_completed(futures):
|
|
84
|
+
file_path, result = future.result()
|
|
85
|
+
if result and 'error' not in result:
|
|
86
|
+
index[file_path] = result
|
|
87
|
+
pbar.update(1)
|
|
88
|
+
|
|
89
|
+
return index
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""LLM package for Contextly."""
|
|
2
|
+
|
|
3
|
+
from contextly.llm.manager import LLMManager
|
|
4
|
+
from contextly.llm.base import LLMProvider
|
|
5
|
+
from contextly.llm.models import ModelManager, ModelRegistry, ModelProvider
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
'LLMManager',
|
|
9
|
+
'LLMProvider',
|
|
10
|
+
'ModelManager',
|
|
11
|
+
'ModelRegistry',
|
|
12
|
+
'ModelProvider'
|
|
13
|
+
]
|
|
@@ -14,10 +14,13 @@ class LLMManager:
|
|
|
14
14
|
"""Manages LLM providers and generates code explanations."""
|
|
15
15
|
|
|
16
16
|
def __init__(self, model: Optional[str] = None):
|
|
17
|
+
"""Initialize the LLM manager with the given model."""
|
|
17
18
|
self.model_manager = ModelManager()
|
|
18
19
|
self.providers: Dict[str, LLMProvider] = {}
|
|
19
20
|
self.current_model = model or os.getenv('CONTEXTLY_MODEL', 'codellama')
|
|
21
|
+
self.initialized = False
|
|
20
22
|
self._initialize_providers()
|
|
23
|
+
self.initialized = True
|
|
21
24
|
|
|
22
25
|
def _initialize_providers(self) -> None:
|
|
23
26
|
"""Initialize LLM providers."""
|
|
@@ -27,6 +30,16 @@ class LLMManager:
|
|
|
27
30
|
# Add OpenAI if key is available
|
|
28
31
|
if os.getenv('OPENAI_API_KEY'):
|
|
29
32
|
self.providers['openai'] = OpenAIProvider()
|
|
33
|
+
|
|
34
|
+
# Ensure the default provider is available
|
|
35
|
+
self.ensure_default_provider()
|
|
36
|
+
|
|
37
|
+
def ensure_default_provider(self) -> None:
|
|
38
|
+
"""Ensure that a default provider is available."""
|
|
39
|
+
# Try to set up Ollama with codellama as default
|
|
40
|
+
if 'ollama' in self.providers:
|
|
41
|
+
if not self.model_manager.registry.get_model('codellama'):
|
|
42
|
+
self.model_manager.download_model('codellama', ModelProvider.OLLAMA)
|
|
30
43
|
|
|
31
44
|
def get_available_provider(self) -> Optional[LLMProvider]:
|
|
32
45
|
"""Get the appropriate provider for the current model."""
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
Model management system for various LLM providers.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
from dataclasses import dataclass
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
6
|
from enum import Enum
|
|
7
7
|
from typing import Dict, Any, Optional, List
|
|
8
8
|
import requests
|
|
@@ -25,7 +25,7 @@ class ModelInfo:
|
|
|
25
25
|
description: str
|
|
26
26
|
context_length: int
|
|
27
27
|
quantization: Optional[str] = None
|
|
28
|
-
metadata: Dict[str, Any] =
|
|
28
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
29
29
|
|
|
30
30
|
class ModelRegistry:
|
|
31
31
|
"""Registry of available models and their capabilities."""
|
|
@@ -35,21 +35,54 @@ class ModelRegistry:
|
|
|
35
35
|
self.models: Dict[str, ModelInfo] = {}
|
|
36
36
|
self._load_models()
|
|
37
37
|
|
|
38
|
+
def check_model_status(self, model_name: str) -> Dict[str, Any]:
|
|
39
|
+
"""Check if a model is installed and ready to use."""
|
|
40
|
+
import subprocess
|
|
41
|
+
status = {
|
|
42
|
+
'installed': False,
|
|
43
|
+
'ready': False,
|
|
44
|
+
'message': ''
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if model_name.startswith('codellama'):
|
|
48
|
+
# Check if Ollama is running
|
|
49
|
+
try:
|
|
50
|
+
result = subprocess.run(['ollama', 'list'], capture_output=True, text=True)
|
|
51
|
+
models = result.stdout.strip().split('\n')
|
|
52
|
+
status['installed'] = any(model_name in model for model in models)
|
|
53
|
+
if status['installed']:
|
|
54
|
+
status['ready'] = True
|
|
55
|
+
status['message'] = '✅ Model is installed and ready'
|
|
56
|
+
else:
|
|
57
|
+
status['message'] = '⚠️ Model not found in Ollama'
|
|
58
|
+
except Exception:
|
|
59
|
+
status['message'] = '❌ Ollama not running or not installed'
|
|
60
|
+
return status
|
|
61
|
+
|
|
38
62
|
def _load_models(self) -> None:
|
|
39
63
|
"""Load model registry from config file."""
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
64
|
+
try:
|
|
65
|
+
if self.config_path.exists():
|
|
66
|
+
with open(self.config_path, 'r') as f:
|
|
67
|
+
try:
|
|
68
|
+
data = json.load(f)
|
|
69
|
+
for model_data in data.get('models', []):
|
|
70
|
+
try:
|
|
71
|
+
info = ModelInfo(
|
|
72
|
+
name=model_data['name'],
|
|
73
|
+
provider=ModelProvider(model_data['provider']),
|
|
74
|
+
description=model_data.get('description', ''),
|
|
75
|
+
context_length=model_data.get('context_length', 4096),
|
|
76
|
+
quantization=model_data.get('quantization'),
|
|
77
|
+
metadata=model_data.get('metadata', {})
|
|
78
|
+
)
|
|
79
|
+
self.models[model_data['name']] = info
|
|
80
|
+
except (KeyError, ValueError) as e:
|
|
81
|
+
print(f"Warning: Skipping invalid model data: {e}")
|
|
82
|
+
except Exception as e:
|
|
83
|
+
print(f"Error loading models from config: {e}")
|
|
84
|
+
except Exception as e:
|
|
85
|
+
print(f"Error accessing config file: {e}")
|
|
53
86
|
|
|
54
87
|
def _save_models(self) -> None:
|
|
55
88
|
"""Save model registry to config file."""
|
|
@@ -12,6 +12,7 @@ class OllamaProvider(LLMProvider):
|
|
|
12
12
|
|
|
13
13
|
DEFAULT_MODEL = "codellama"
|
|
14
14
|
BASE_URL = "http://localhost:11434/api"
|
|
15
|
+
_model_checked = {} # Class variable to track which models have been checked
|
|
15
16
|
|
|
16
17
|
def __init__(self, model: str = DEFAULT_MODEL):
|
|
17
18
|
self.model = model
|
|
@@ -19,6 +20,9 @@ class OllamaProvider(LLMProvider):
|
|
|
19
20
|
|
|
20
21
|
def _ensure_model(self) -> None:
|
|
21
22
|
"""Ensure the model is downloaded and ready."""
|
|
23
|
+
if self.model in self._model_checked:
|
|
24
|
+
return
|
|
25
|
+
|
|
22
26
|
try:
|
|
23
27
|
# Check if model exists
|
|
24
28
|
response = requests.get(f"{self.BASE_URL}/tags")
|
|
@@ -34,6 +38,7 @@ class OllamaProvider(LLMProvider):
|
|
|
34
38
|
if pull_response.status_code != 200:
|
|
35
39
|
raise RuntimeError(f"Failed to pull model: {pull_response.text}")
|
|
36
40
|
print(f"{self.model} ready!")
|
|
41
|
+
self._model_checked[self.model] = True
|
|
37
42
|
except Exception as e:
|
|
38
43
|
raise RuntimeError(f"Failed to set up Ollama: {str(e)}")
|
|
39
44
|
|
|
@@ -10,7 +10,7 @@ from .base import LLMProvider
|
|
|
10
10
|
class OpenAIProvider(LLMProvider):
|
|
11
11
|
"""LLM provider using OpenAI API."""
|
|
12
12
|
|
|
13
|
-
DEFAULT_MODEL = "gpt-
|
|
13
|
+
DEFAULT_MODEL = "gpt-3.5-turbo"
|
|
14
14
|
|
|
15
15
|
def __init__(self, model: str = DEFAULT_MODEL, api_key: Optional[str] = None):
|
|
16
16
|
self.model = model
|
|
@@ -27,6 +27,8 @@ class OpenAIProvider(LLMProvider):
|
|
|
27
27
|
raise RuntimeError("OpenAI API is not configured")
|
|
28
28
|
|
|
29
29
|
try:
|
|
30
|
+
if self.client is None:
|
|
31
|
+
raise RuntimeError("OpenAI client is not initialized. Please provide a valid API key.")
|
|
30
32
|
response = self.client.chat.completions.create(
|
|
31
33
|
model=self.model,
|
|
32
34
|
messages=[{"role": "user", "content": prompt}],
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration and fixtures for pytest.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import pytest
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
@pytest.fixture
|
|
10
|
+
def test_repo_path():
|
|
11
|
+
"""Get the path to the test repository."""
|
|
12
|
+
# Return the path to the contextly repository itself
|
|
13
|
+
return Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
14
|
+
|
|
15
|
+
@pytest.fixture(autouse=True)
|
|
16
|
+
def setup_environment():
|
|
17
|
+
"""Set up test environment variables."""
|
|
18
|
+
# Store existing environment variables
|
|
19
|
+
old_env = {}
|
|
20
|
+
if 'CONTEXTLY_MODEL' in os.environ:
|
|
21
|
+
old_env['CONTEXTLY_MODEL'] = os.environ['CONTEXTLY_MODEL']
|
|
22
|
+
|
|
23
|
+
# Set test environment variables
|
|
24
|
+
os.environ['CONTEXTLY_MODEL'] = 'codellama'
|
|
25
|
+
|
|
26
|
+
yield
|
|
27
|
+
|
|
28
|
+
# Restore environment variables
|
|
29
|
+
for key in ['CONTEXTLY_MODEL']:
|
|
30
|
+
if key in old_env:
|
|
31
|
+
os.environ[key] = old_env[key]
|
|
32
|
+
elif key in os.environ:
|
|
33
|
+
del os.environ[key]
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Integration tests for Contextly using its own codebase as test data.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import pytest
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from contextly.app import Contextly
|
|
9
|
+
from contextly.core.sync import RepoSync
|
|
10
|
+
from contextly.core.embeddings import EmbeddingEngine
|
|
11
|
+
from contextly.llm.manager import LLMManager
|
|
12
|
+
from contextly.llm.ollama import OllamaProvider
|
|
13
|
+
|
|
14
|
+
@pytest.fixture
|
|
15
|
+
def repo_path():
|
|
16
|
+
"""Get the path to the Contextly repository."""
|
|
17
|
+
# Get the tests directory and go up one level
|
|
18
|
+
return Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
19
|
+
|
|
20
|
+
@pytest.fixture
|
|
21
|
+
def contextly_app(repo_path):
|
|
22
|
+
"""Create a Contextly instance for testing."""
|
|
23
|
+
return Contextly(repo_path)
|
|
24
|
+
|
|
25
|
+
def test_repo_sync(repo_path):
|
|
26
|
+
"""Test repository synchronization with Contextly's own codebase."""
|
|
27
|
+
repo_sync = RepoSync(repo_path)
|
|
28
|
+
results = repo_sync.scan_files()
|
|
29
|
+
|
|
30
|
+
# Verify essential files are found
|
|
31
|
+
python_files = [f for f in results if str(f).endswith('.py')]
|
|
32
|
+
assert len(python_files) > 0, "Should find Python files"
|
|
33
|
+
|
|
34
|
+
# Verify core modules are found
|
|
35
|
+
core_files = {
|
|
36
|
+
'app.py',
|
|
37
|
+
'cli.py',
|
|
38
|
+
'embeddings.py',
|
|
39
|
+
'analyzer.py',
|
|
40
|
+
'sync.py'
|
|
41
|
+
}
|
|
42
|
+
found_files = {f.name for f in python_files}
|
|
43
|
+
assert core_files.intersection(found_files), "Should find core modules"
|
|
44
|
+
|
|
45
|
+
def test_embedding_engine(repo_path):
|
|
46
|
+
"""Test embedding engine with Contextly's codebase."""
|
|
47
|
+
engine = EmbeddingEngine(repo_path)
|
|
48
|
+
|
|
49
|
+
# Test search functionality
|
|
50
|
+
results = engine.search("What is the main purpose of this codebase?")
|
|
51
|
+
assert results is not None
|
|
52
|
+
assert 'results' in results
|
|
53
|
+
assert len(results['results']) > 0
|
|
54
|
+
|
|
55
|
+
# Verify search results contain relevant files
|
|
56
|
+
files = {r['file'] for r in results['results']}
|
|
57
|
+
assert any('README.md' in str(f) or 'app.py' in str(f) for f in files), \
|
|
58
|
+
"Should find relevant documentation or core files"
|
|
59
|
+
|
|
60
|
+
def test_llm_manager():
|
|
61
|
+
"""Test LLM manager functionality."""
|
|
62
|
+
manager = LLMManager(model='codellama')
|
|
63
|
+
assert manager.initialized
|
|
64
|
+
assert 'ollama' in manager.providers
|
|
65
|
+
|
|
66
|
+
# Test available provider
|
|
67
|
+
provider = manager.get_available_provider()
|
|
68
|
+
assert provider is not None
|
|
69
|
+
assert isinstance(provider, OllamaProvider)
|
|
70
|
+
|
|
71
|
+
# Test model is available
|
|
72
|
+
assert provider.is_available()
|
|
73
|
+
assert provider.model == 'codellama'
|
|
74
|
+
|
|
75
|
+
# Test code explanation with simple code
|
|
76
|
+
code = '''def greet(name: str) -> str:
|
|
77
|
+
"""Greet a user by name."""
|
|
78
|
+
return f"Hello, {name}!"'''
|
|
79
|
+
|
|
80
|
+
context = {
|
|
81
|
+
'question': 'What does this function do?',
|
|
82
|
+
'code_snippets': [code],
|
|
83
|
+
'files': ['test.py']
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
explanation = manager.explain_code(code, context)
|
|
87
|
+
assert explanation is not None
|
|
88
|
+
assert len(explanation) > 0
|
|
89
|
+
|
|
90
|
+
def test_ollama_provider():
|
|
91
|
+
"""Test Ollama LLM provider."""
|
|
92
|
+
provider = OllamaProvider()
|
|
93
|
+
|
|
94
|
+
# Test model availability
|
|
95
|
+
assert provider.is_available(), "Ollama should be running"
|
|
96
|
+
|
|
97
|
+
# Test response generation
|
|
98
|
+
prompt = "What is a code analyzer?"
|
|
99
|
+
response = provider.generate_response(prompt)
|
|
100
|
+
assert response is not None
|
|
101
|
+
assert len(response) > 0
|
|
102
|
+
|
|
103
|
+
def test_contextly_ask(contextly_app):
|
|
104
|
+
"""Test the ask functionality with real questions about the codebase."""
|
|
105
|
+
questions = [
|
|
106
|
+
"What is the purpose of the sync.py file?",
|
|
107
|
+
"How does the embedding engine work?",
|
|
108
|
+
"What LLM models are supported?",
|
|
109
|
+
"How are code snippets parsed?"
|
|
110
|
+
]
|
|
111
|
+
|
|
112
|
+
for question in questions:
|
|
113
|
+
result = contextly_app.ask(question)
|
|
114
|
+
assert result is not None
|
|
115
|
+
assert 'answer' in result or 'error' in result
|
|
116
|
+
|
|
117
|
+
if 'answer' in result:
|
|
118
|
+
assert len(result['answer']) > 0
|
|
119
|
+
# Verify context is provided when available
|
|
120
|
+
if result.get('context'):
|
|
121
|
+
assert 'results' in result['context']
|
|
122
|
+
assert len(result['context']['results']) > 0
|
|
123
|
+
|
|
124
|
+
def test_error_handling(contextly_app):
|
|
125
|
+
"""Test error handling in various scenarios."""
|
|
126
|
+
# Test with empty question
|
|
127
|
+
with pytest.raises(Exception):
|
|
128
|
+
contextly_app.ask("")
|
|
129
|
+
|
|
130
|
+
# Test with very long question
|
|
131
|
+
long_question = "what " * 1000
|
|
132
|
+
result = contextly_app.ask(long_question)
|
|
133
|
+
assert 'error' in result or 'answer' in result
|
|
134
|
+
|
|
135
|
+
# Test with invalid repository path
|
|
136
|
+
invalid_app = Contextly(Path("/nonexistent/path"))
|
|
137
|
+
result = invalid_app.ask("What is this?")
|
|
138
|
+
assert 'error' in result
|
|
139
|
+
|
|
140
|
+
def test_code_analysis(contextly_app):
|
|
141
|
+
"""Test code analysis functionality."""
|
|
142
|
+
# Get insights about a specific file
|
|
143
|
+
core_files = [
|
|
144
|
+
'app.py',
|
|
145
|
+
'cli.py',
|
|
146
|
+
'embeddings.py'
|
|
147
|
+
]
|
|
148
|
+
|
|
149
|
+
for file in core_files:
|
|
150
|
+
question = f"What does {file} do?"
|
|
151
|
+
result = contextly_app.ask(question)
|
|
152
|
+
assert result is not None
|
|
153
|
+
assert 'answer' in result
|
|
154
|
+
assert len(result['answer']) > 0
|
|
155
|
+
|
|
156
|
+
def test_multi_file_context(contextly_app):
|
|
157
|
+
"""Test handling questions that require context from multiple files."""
|
|
158
|
+
questions = [
|
|
159
|
+
"How does the CLI interface interact with the core application?",
|
|
160
|
+
"What is the relationship between embeddings.py and analyzer.py?",
|
|
161
|
+
"How do the different LLM providers work together?"
|
|
162
|
+
]
|
|
163
|
+
|
|
164
|
+
for question in questions:
|
|
165
|
+
result = contextly_app.ask(question)
|
|
166
|
+
assert result is not None
|
|
167
|
+
assert 'answer' in result
|
|
168
|
+
if result.get('context'):
|
|
169
|
+
files = {r['file'] for r in result['context']['results']}
|
|
170
|
+
assert len(files) > 1, "Should reference multiple files for complex questions"
|
|
@@ -1,85 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Core business logic for Contextly commands.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
from typing import Dict, Any, Optional
|
|
7
|
-
|
|
8
|
-
from .core.analyzer import CodeAnalyzer
|
|
9
|
-
from .core.embeddings import EmbeddingEngine
|
|
10
|
-
from .core.sync import RepoSync
|
|
11
|
-
from .llm.manager import LLMManager
|
|
12
|
-
|
|
13
|
-
class Contextly:
|
|
14
|
-
"""Main class coordinating Contextly's functionality."""
|
|
15
|
-
|
|
16
|
-
def __init__(self, repo_path: Optional[Path] = None, model: Optional[str] = None):
|
|
17
|
-
self.repo_path = repo_path or Path.cwd()
|
|
18
|
-
self.llm_manager = LLMManager(model)
|
|
19
|
-
self.analyzer = CodeAnalyzer(self.repo_path)
|
|
20
|
-
self.embedding_engine = EmbeddingEngine(self.repo_path)
|
|
21
|
-
self.repo_sync = RepoSync(self.repo_path)
|
|
22
|
-
|
|
23
|
-
def ask(self, question: str) -> Dict[str, Any]:
|
|
24
|
-
"""Answer questions about the codebase."""
|
|
25
|
-
# Initialize if needed
|
|
26
|
-
if not hasattr(self, '_initialized'):
|
|
27
|
-
self.embedding_engine.initialize()
|
|
28
|
-
self._initialized = True
|
|
29
|
-
|
|
30
|
-
# Search for relevant code
|
|
31
|
-
search_results = self.embedding_engine.search(question)
|
|
32
|
-
|
|
33
|
-
# Build context from search results
|
|
34
|
-
context = []
|
|
35
|
-
for result in search_results['results']:
|
|
36
|
-
context.append(f"From {result['file']}:\n{result['content']}\n")
|
|
37
|
-
|
|
38
|
-
# Generate answer using LLM
|
|
39
|
-
prompt = f"Question: {question}\n\nContext from codebase:\n{''.join(context)}\n\nAnswer:"
|
|
40
|
-
answer = self.analyzer.llm.generate(prompt)
|
|
41
|
-
|
|
42
|
-
return {
|
|
43
|
-
'answer': answer,
|
|
44
|
-
'context': search_results
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
def explain(self, location: str) -> str:
|
|
48
|
-
"""Explain code at specific location."""
|
|
49
|
-
file_path, line_number = location.split(':')
|
|
50
|
-
return self.analyzer.explain_code(file_path, int(line_number))
|
|
51
|
-
|
|
52
|
-
def search(self, term: str) -> Dict[str, Any]:
|
|
53
|
-
"""Semantic search across codebase."""
|
|
54
|
-
if not hasattr(self, '_initialized'):
|
|
55
|
-
self.embedding_engine.initialize()
|
|
56
|
-
self._initialized = True
|
|
57
|
-
|
|
58
|
-
results = self.embedding_engine.search(term)
|
|
59
|
-
return {
|
|
60
|
-
'query': results['query'],
|
|
61
|
-
'results': results['results']
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
def diff(self, path1: Path, path2: Path) -> Dict[str, Any]:
|
|
65
|
-
"""Compare configuration files."""
|
|
66
|
-
return self.analyzer.compare_configs(path1, path2)
|
|
67
|
-
|
|
68
|
-
def sync(self) -> None:
|
|
69
|
-
"""Build or rebuild the embedding index."""
|
|
70
|
-
# Scan repository
|
|
71
|
-
files = list(self.repo_sync.scan_files())
|
|
72
|
-
|
|
73
|
-
# Build index
|
|
74
|
-
index = self.repo_sync.index_repository()
|
|
75
|
-
|
|
76
|
-
# Initialize embedding engine
|
|
77
|
-
self.embedding_engine.initialize()
|
|
78
|
-
|
|
79
|
-
# Convert index to list of documents
|
|
80
|
-
docs = []
|
|
81
|
-
for file_path, file_data in index.items():
|
|
82
|
-
docs.extend(file_data.get('chunks', []))
|
|
83
|
-
|
|
84
|
-
# Embed documents
|
|
85
|
-
self.embedding_engine.embed_documents(docs)
|
|
@@ -1,109 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Vector embedding and similarity search functionality.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
import os
|
|
7
|
-
from typing import List, Dict, Any, Optional
|
|
8
|
-
import chromadb
|
|
9
|
-
from chromadb.config import Settings
|
|
10
|
-
from sentence_transformers import SentenceTransformer
|
|
11
|
-
|
|
12
|
-
class EmbeddingEngine:
|
|
13
|
-
"""Handles document embedding and similarity search."""
|
|
14
|
-
|
|
15
|
-
def __init__(self, repo_path: Path):
|
|
16
|
-
self.repo_path = repo_path
|
|
17
|
-
self.model = None # Lazy load the model
|
|
18
|
-
self.db = None # Initialize ChromaDB client
|
|
19
|
-
self.collection = None
|
|
20
|
-
|
|
21
|
-
def initialize(self):
|
|
22
|
-
"""Initialize the embedding model and vector store."""
|
|
23
|
-
if self.model is None:
|
|
24
|
-
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
25
|
-
|
|
26
|
-
if self.db is None:
|
|
27
|
-
# Create .contextly directory if it doesn't exist
|
|
28
|
-
db_path = self.repo_path / '.contextly'
|
|
29
|
-
os.makedirs(db_path, exist_ok=True)
|
|
30
|
-
|
|
31
|
-
# Initialize ChromaDB with persistent storage
|
|
32
|
-
self.db = chromadb.Client(Settings(
|
|
33
|
-
persist_directory=str(db_path),
|
|
34
|
-
anonymized_telemetry=False
|
|
35
|
-
))
|
|
36
|
-
|
|
37
|
-
# Get or create collection for this repo
|
|
38
|
-
self.collection = self.db.get_or_create_collection(
|
|
39
|
-
name="contextly_docs",
|
|
40
|
-
metadata={"repo_path": str(self.repo_path)}
|
|
41
|
-
)
|
|
42
|
-
|
|
43
|
-
def embed_documents(self, documents: List[Dict[str, Any]]):
|
|
44
|
-
"""Convert documents into vector embeddings and store them."""
|
|
45
|
-
if not documents:
|
|
46
|
-
return
|
|
47
|
-
|
|
48
|
-
# Ensure initialization
|
|
49
|
-
self.initialize()
|
|
50
|
-
|
|
51
|
-
# Prepare documents for ChromaDB
|
|
52
|
-
texts = []
|
|
53
|
-
metadatas = []
|
|
54
|
-
ids = []
|
|
55
|
-
|
|
56
|
-
for idx, doc in enumerate(documents):
|
|
57
|
-
# Create a meaningful document summary
|
|
58
|
-
doc_text = f"{doc.get('type', 'unknown')} - {doc.get('name', '')}:\n{doc.get('content', '')}"
|
|
59
|
-
texts.append(doc_text)
|
|
60
|
-
|
|
61
|
-
# Store metadata
|
|
62
|
-
metadatas.append({
|
|
63
|
-
"file_path": str(doc.get("file_path", "")),
|
|
64
|
-
"type": doc.get("type", "unknown"),
|
|
65
|
-
"name": doc.get("name", ""),
|
|
66
|
-
"start_line": doc.get("start_line", 0),
|
|
67
|
-
"end_line": doc.get("end_line", 0),
|
|
68
|
-
})
|
|
69
|
-
|
|
70
|
-
# Generate unique ID
|
|
71
|
-
doc_id = f"doc_{idx}_{hash(doc_text) & 0xFFFFFFFF}"
|
|
72
|
-
ids.append(doc_id)
|
|
73
|
-
|
|
74
|
-
# Add documents to the collection
|
|
75
|
-
self.collection.add(
|
|
76
|
-
documents=texts,
|
|
77
|
-
metadatas=metadatas,
|
|
78
|
-
ids=ids
|
|
79
|
-
)
|
|
80
|
-
|
|
81
|
-
def search(self, query: str, top_k: int = 5) -> Dict[str, Any]:
|
|
82
|
-
"""Search for most similar documents to query."""
|
|
83
|
-
self.initialize()
|
|
84
|
-
|
|
85
|
-
# Query the collection
|
|
86
|
-
results = self.collection.query(
|
|
87
|
-
query_texts=[query],
|
|
88
|
-
n_results=top_k
|
|
89
|
-
)
|
|
90
|
-
|
|
91
|
-
# Format results
|
|
92
|
-
hits = []
|
|
93
|
-
for i in range(len(results['ids'][0])):
|
|
94
|
-
hits.append({
|
|
95
|
-
'content': results['documents'][0][i],
|
|
96
|
-
'metadata': results['metadatas'][0][i],
|
|
97
|
-
'score': results['distances'][0][i] if 'distances' in results else None,
|
|
98
|
-
'file': results['metadatas'][0][i]['file_path']
|
|
99
|
-
})
|
|
100
|
-
|
|
101
|
-
return {
|
|
102
|
-
'query': query,
|
|
103
|
-
'results': hits
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
def clear(self):
|
|
107
|
-
"""Clear all embeddings from storage."""
|
|
108
|
-
if self.collection:
|
|
109
|
-
self.collection.delete()
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Repository synchronization and indexing functionality.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
from typing import List, Dict, Any, Iterator
|
|
7
|
-
import os
|
|
8
|
-
|
|
9
|
-
from ..parsers.python import PythonParser
|
|
10
|
-
from ..parsers.javascript import JavaScriptParser
|
|
11
|
-
from ..parsers.config import ConfigParser
|
|
12
|
-
|
|
13
|
-
class RepoSync:
|
|
14
|
-
"""Handles repository scanning and indexing."""
|
|
15
|
-
|
|
16
|
-
SUPPORTED_EXTENSIONS = {
|
|
17
|
-
'.py', '.js', '.json', '.yml', '.yaml',
|
|
18
|
-
'.env', '.toml', '.md', '.txt'
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
def __init__(self, repo_path: Path):
|
|
22
|
-
self.repo_path = repo_path
|
|
23
|
-
|
|
24
|
-
def scan_files(self) -> Iterator[Path]:
|
|
25
|
-
"""Scan repository for supported files."""
|
|
26
|
-
for root, _, files in os.walk(self.repo_path):
|
|
27
|
-
root_path = Path(root)
|
|
28
|
-
if '.git' in root_path.parts:
|
|
29
|
-
continue
|
|
30
|
-
|
|
31
|
-
for file in files:
|
|
32
|
-
file_path = root_path / file
|
|
33
|
-
if file_path.suffix in self.SUPPORTED_EXTENSIONS:
|
|
34
|
-
yield file_path
|
|
35
|
-
|
|
36
|
-
def index_repository(self) -> Dict[str, Any]:
|
|
37
|
-
"""Build index of repository contents."""
|
|
38
|
-
index = {}
|
|
39
|
-
parsers = {
|
|
40
|
-
'.py': PythonParser(),
|
|
41
|
-
'.js': JavaScriptParser(),
|
|
42
|
-
'.jsx': JavaScriptParser(),
|
|
43
|
-
'.ts': JavaScriptParser(),
|
|
44
|
-
'.tsx': JavaScriptParser(),
|
|
45
|
-
'.json': ConfigParser(),
|
|
46
|
-
'.yml': ConfigParser(),
|
|
47
|
-
'.yaml': ConfigParser(),
|
|
48
|
-
'.toml': ConfigParser(),
|
|
49
|
-
'.env': ConfigParser(),
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
for file_path in self.scan_files():
|
|
53
|
-
ext = file_path.suffix
|
|
54
|
-
parser = parsers.get(ext)
|
|
55
|
-
|
|
56
|
-
if parser:
|
|
57
|
-
try:
|
|
58
|
-
result = parser.parse(file_path)
|
|
59
|
-
# Add file path to chunks for reference
|
|
60
|
-
for chunk in result.get('chunks', []):
|
|
61
|
-
chunk['file_path'] = str(file_path)
|
|
62
|
-
index[str(file_path)] = result
|
|
63
|
-
except Exception as e:
|
|
64
|
-
print(f"Error parsing {file_path}: {e}")
|
|
65
|
-
|
|
66
|
-
return index
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|