contextly 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contextly/__init__.py +5 -0
- contextly/app.py +85 -0
- contextly/cli.py +201 -0
- contextly/core/analyzer.py +111 -0
- contextly/core/embeddings.py +109 -0
- contextly/core/sync.py +66 -0
- contextly/llm/base.py +19 -0
- contextly/llm/manager.py +126 -0
- contextly/llm/models.py +204 -0
- contextly/llm/ollama.py +73 -0
- contextly/llm/openai.py +39 -0
- contextly/parsers/base.py +39 -0
- contextly/parsers/config.py +79 -0
- contextly/parsers/javascript.py +122 -0
- contextly/parsers/python.py +60 -0
- contextly-0.1.0.dist-info/METADATA +209 -0
- contextly-0.1.0.dist-info/RECORD +20 -0
- contextly-0.1.0.dist-info/WHEEL +4 -0
- contextly-0.1.0.dist-info/entry_points.txt +2 -0
- contextly-0.1.0.dist-info/licenses/LICENSE +21 -0
contextly/__init__.py
ADDED
contextly/app.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core business logic for Contextly commands.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict, Any, Optional
|
|
7
|
+
|
|
8
|
+
from .core.analyzer import CodeAnalyzer
|
|
9
|
+
from .core.embeddings import EmbeddingEngine
|
|
10
|
+
from .core.sync import RepoSync
|
|
11
|
+
from .llm.manager import LLMManager
|
|
12
|
+
|
|
13
|
+
class Contextly:
|
|
14
|
+
"""Main class coordinating Contextly's functionality."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, repo_path: Optional[Path] = None, model: Optional[str] = None):
|
|
17
|
+
self.repo_path = repo_path or Path.cwd()
|
|
18
|
+
self.llm_manager = LLMManager(model)
|
|
19
|
+
self.analyzer = CodeAnalyzer(self.repo_path)
|
|
20
|
+
self.embedding_engine = EmbeddingEngine(self.repo_path)
|
|
21
|
+
self.repo_sync = RepoSync(self.repo_path)
|
|
22
|
+
|
|
23
|
+
def ask(self, question: str) -> Dict[str, Any]:
|
|
24
|
+
"""Answer questions about the codebase."""
|
|
25
|
+
# Initialize if needed
|
|
26
|
+
if not hasattr(self, '_initialized'):
|
|
27
|
+
self.embedding_engine.initialize()
|
|
28
|
+
self._initialized = True
|
|
29
|
+
|
|
30
|
+
# Search for relevant code
|
|
31
|
+
search_results = self.embedding_engine.search(question)
|
|
32
|
+
|
|
33
|
+
# Build context from search results
|
|
34
|
+
context = []
|
|
35
|
+
for result in search_results['results']:
|
|
36
|
+
context.append(f"From {result['file']}:\n{result['content']}\n")
|
|
37
|
+
|
|
38
|
+
# Generate answer using LLM
|
|
39
|
+
prompt = f"Question: {question}\n\nContext from codebase:\n{''.join(context)}\n\nAnswer:"
|
|
40
|
+
answer = self.analyzer.llm.generate(prompt)
|
|
41
|
+
|
|
42
|
+
return {
|
|
43
|
+
'answer': answer,
|
|
44
|
+
'context': search_results
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
def explain(self, location: str) -> str:
|
|
48
|
+
"""Explain code at specific location."""
|
|
49
|
+
file_path, line_number = location.split(':')
|
|
50
|
+
return self.analyzer.explain_code(file_path, int(line_number))
|
|
51
|
+
|
|
52
|
+
def search(self, term: str) -> Dict[str, Any]:
|
|
53
|
+
"""Semantic search across codebase."""
|
|
54
|
+
if not hasattr(self, '_initialized'):
|
|
55
|
+
self.embedding_engine.initialize()
|
|
56
|
+
self._initialized = True
|
|
57
|
+
|
|
58
|
+
results = self.embedding_engine.search(term)
|
|
59
|
+
return {
|
|
60
|
+
'query': results['query'],
|
|
61
|
+
'results': results['results']
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
def diff(self, path1: Path, path2: Path) -> Dict[str, Any]:
|
|
65
|
+
"""Compare configuration files."""
|
|
66
|
+
return self.analyzer.compare_configs(path1, path2)
|
|
67
|
+
|
|
68
|
+
def sync(self) -> None:
|
|
69
|
+
"""Build or rebuild the embedding index."""
|
|
70
|
+
# Scan repository
|
|
71
|
+
files = list(self.repo_sync.scan_files())
|
|
72
|
+
|
|
73
|
+
# Build index
|
|
74
|
+
index = self.repo_sync.index_repository()
|
|
75
|
+
|
|
76
|
+
# Initialize embedding engine
|
|
77
|
+
self.embedding_engine.initialize()
|
|
78
|
+
|
|
79
|
+
# Convert index to list of documents
|
|
80
|
+
docs = []
|
|
81
|
+
for file_path, file_data in index.items():
|
|
82
|
+
docs.extend(file_data.get('chunks', []))
|
|
83
|
+
|
|
84
|
+
# Embed documents
|
|
85
|
+
self.embedding_engine.embed_documents(docs)
|
contextly/cli.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Main CLI interface for Contextly
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
from rich import print
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
from rich.markdown import Markdown
|
|
9
|
+
from rich.panel import Panel
|
|
10
|
+
from rich.syntax import Syntax
|
|
11
|
+
from typing import Optional
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
from .app import Contextly
|
|
15
|
+
|
|
16
|
+
console = Console()
|
|
17
|
+
|
|
18
|
+
app = typer.Typer(
|
|
19
|
+
name="contextly",
|
|
20
|
+
help="AI Context Engine for Developers",
|
|
21
|
+
add_completion=False,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
model_app = typer.Typer(help="Manage LLM models")
|
|
25
|
+
app.add_typer(model_app, name="model")
|
|
26
|
+
|
|
27
|
+
def get_contextly(repo_path: Optional[Path] = None) -> Contextly:
|
|
28
|
+
"""Get or create Contextly instance."""
|
|
29
|
+
return Contextly(repo_path)
|
|
30
|
+
|
|
31
|
+
@app.command()
|
|
32
|
+
def ask(
|
|
33
|
+
question: str = typer.Argument(..., help="Your question about the codebase"),
|
|
34
|
+
) -> None:
|
|
35
|
+
"""Ask natural language questions about your codebase."""
|
|
36
|
+
print("🔍 Searching for answer...")
|
|
37
|
+
result = get_contextly().ask(question)
|
|
38
|
+
|
|
39
|
+
# Print answer in a panel
|
|
40
|
+
console.print(Panel(
|
|
41
|
+
Markdown(result['answer']),
|
|
42
|
+
title="💡 Answer",
|
|
43
|
+
border_style="blue"
|
|
44
|
+
))
|
|
45
|
+
|
|
46
|
+
# Print relevant code snippets
|
|
47
|
+
if result.get('context', {}).get('results'):
|
|
48
|
+
console.print("\n📚 Relevant code:")
|
|
49
|
+
for snippet in result['context']['results']:
|
|
50
|
+
console.print(Panel(
|
|
51
|
+
Syntax(
|
|
52
|
+
snippet['content'],
|
|
53
|
+
"python", # TODO: Detect language from file extension
|
|
54
|
+
theme="monokai",
|
|
55
|
+
line_numbers=True
|
|
56
|
+
),
|
|
57
|
+
title=f"📄 {snippet['file']}",
|
|
58
|
+
border_style="dim"
|
|
59
|
+
))
|
|
60
|
+
|
|
61
|
+
@app.command()
|
|
62
|
+
def explain(
|
|
63
|
+
location: str = typer.Argument(..., help="File location (e.g., src/auth.py:52)"),
|
|
64
|
+
) -> None:
|
|
65
|
+
"""Get human-readable explanation of code at specific location."""
|
|
66
|
+
print("📝 Analyzing code...")
|
|
67
|
+
explanation = get_contextly().explain(location)
|
|
68
|
+
print(explanation)
|
|
69
|
+
|
|
70
|
+
@app.command()
|
|
71
|
+
def search(
|
|
72
|
+
term: str = typer.Argument(..., help="Search term"),
|
|
73
|
+
) -> None:
|
|
74
|
+
"""Semantic search across your codebase."""
|
|
75
|
+
print("🔎 Searching codebase...")
|
|
76
|
+
results = get_contextly().search(term)
|
|
77
|
+
|
|
78
|
+
if not results['results']:
|
|
79
|
+
print("❌ No matching code found")
|
|
80
|
+
return
|
|
81
|
+
|
|
82
|
+
print(f"\n✨ Found {len(results['results'])} matches:\n")
|
|
83
|
+
for result in results['results']:
|
|
84
|
+
console.print(Panel(
|
|
85
|
+
Syntax(
|
|
86
|
+
result['content'],
|
|
87
|
+
"python", # TODO: Detect language from file extension
|
|
88
|
+
theme="monokai",
|
|
89
|
+
line_numbers=True
|
|
90
|
+
),
|
|
91
|
+
title=f"📄 {result['file']} (Score: {result['score']:.2f})",
|
|
92
|
+
border_style="blue"
|
|
93
|
+
))
|
|
94
|
+
|
|
95
|
+
@app.command()
|
|
96
|
+
def diff(
|
|
97
|
+
path1: Path = typer.Argument(..., help="First file path"),
|
|
98
|
+
path2: Path = typer.Argument(..., help="Second file path"),
|
|
99
|
+
) -> None:
|
|
100
|
+
"""Compare and explain differences between configuration files."""
|
|
101
|
+
print("⚡ Analyzing differences...")
|
|
102
|
+
differences = get_contextly().diff(path1, path2)
|
|
103
|
+
|
|
104
|
+
# Print differences
|
|
105
|
+
if not differences['differences']:
|
|
106
|
+
print("✅ Files are identical")
|
|
107
|
+
return
|
|
108
|
+
|
|
109
|
+
# Print unified diff
|
|
110
|
+
console.print("\n📊 Diff:")
|
|
111
|
+
console.print(Panel(
|
|
112
|
+
Syntax(
|
|
113
|
+
"\n".join(differences['unified_diff']),
|
|
114
|
+
"diff",
|
|
115
|
+
theme="monokai"
|
|
116
|
+
),
|
|
117
|
+
border_style="yellow"
|
|
118
|
+
))
|
|
119
|
+
|
|
120
|
+
# Print value differences
|
|
121
|
+
console.print("\n🔍 Key Differences:")
|
|
122
|
+
for diff in differences['differences']:
|
|
123
|
+
console.print(
|
|
124
|
+
f"• {diff['key']}:\n"
|
|
125
|
+
f" - {path1}: {diff['file1_value']!r}\n"
|
|
126
|
+
f" + {path2}: {diff['file2_value']!r}\n"
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
@app.command()
|
|
130
|
+
def sync(
|
|
131
|
+
path: Optional[Path] = typer.Argument(
|
|
132
|
+
None, help="Path to repository (defaults to current directory)"
|
|
133
|
+
),
|
|
134
|
+
) -> None:
|
|
135
|
+
"""Build or rebuild the local embedding index."""
|
|
136
|
+
print("🔄 Syncing repository...")
|
|
137
|
+
get_contextly(path).sync()
|
|
138
|
+
print("✨ Repository indexed successfully!")
|
|
139
|
+
|
|
140
|
+
@model_app.command("list")
|
|
141
|
+
def list_models() -> None:
|
|
142
|
+
"""List available models."""
|
|
143
|
+
ctx = get_contextly()
|
|
144
|
+
models = ctx.llm_manager.list_models()
|
|
145
|
+
|
|
146
|
+
console.print("\n🤖 Available Models:")
|
|
147
|
+
for model in models:
|
|
148
|
+
status_color = "green" if model['status'] == 'available' else "yellow"
|
|
149
|
+
console.print(
|
|
150
|
+
f"\n[bold]{model['name']}[/bold] "
|
|
151
|
+
f"([{status_color}]{model['status']}[/{status_color}])")
|
|
152
|
+
if model.get('description'):
|
|
153
|
+
console.print(f" {model['description']}")
|
|
154
|
+
console.print(f" Provider: {model['provider']}")
|
|
155
|
+
|
|
156
|
+
@model_app.command("use")
|
|
157
|
+
def use_model(
|
|
158
|
+
model_name: str = typer.Argument(..., help="Name of the model to use")
|
|
159
|
+
) -> None:
|
|
160
|
+
"""Set the model to use for code analysis."""
|
|
161
|
+
ctx = get_contextly()
|
|
162
|
+
print(f"🔍 Setting up model {model_name}...")
|
|
163
|
+
|
|
164
|
+
if ctx.llm_manager.set_model(model_name):
|
|
165
|
+
print(f"✅ Now using {model_name}")
|
|
166
|
+
else:
|
|
167
|
+
print(f"❌ Failed to set up {model_name}")
|
|
168
|
+
|
|
169
|
+
@model_app.command("download")
|
|
170
|
+
def download_model(
|
|
171
|
+
model_name: str = typer.Argument(..., help="Name of the model to download"),
|
|
172
|
+
provider: str = typer.Option(
|
|
173
|
+
"ollama",
|
|
174
|
+
help="Model provider (ollama, huggingface, custom)"
|
|
175
|
+
),
|
|
176
|
+
url: Optional[str] = typer.Option(
|
|
177
|
+
None,
|
|
178
|
+
help="URL for custom model download"
|
|
179
|
+
)
|
|
180
|
+
) -> None:
|
|
181
|
+
"""Download a new model."""
|
|
182
|
+
from contextly.llm.models import ModelProvider
|
|
183
|
+
|
|
184
|
+
ctx = get_contextly()
|
|
185
|
+
print(f"💾 Downloading {model_name}...")
|
|
186
|
+
|
|
187
|
+
try:
|
|
188
|
+
provider_enum = ModelProvider(provider)
|
|
189
|
+
if ctx.llm_manager.model_manager.download_model(
|
|
190
|
+
model_name,
|
|
191
|
+
provider_enum,
|
|
192
|
+
url=url
|
|
193
|
+
):
|
|
194
|
+
print(f"✅ Successfully downloaded {model_name}")
|
|
195
|
+
else:
|
|
196
|
+
print(f"❌ Failed to download {model_name}")
|
|
197
|
+
except ValueError:
|
|
198
|
+
print(f"❌ Unknown provider: {provider}")
|
|
199
|
+
|
|
200
|
+
if __name__ == "__main__":
|
|
201
|
+
app()
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core functionality for code parsing and analysis.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import difflib
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import List, Dict, Any, Optional
|
|
8
|
+
from ..llm import LLMManager
|
|
9
|
+
from ..parsers.python import PythonParser
|
|
10
|
+
from ..parsers.javascript import JavaScriptParser
|
|
11
|
+
from ..parsers.config import ConfigParser
|
|
12
|
+
|
|
13
|
+
class Parser:
|
|
14
|
+
"""Base class for parsing different file types."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, repo_path: Path):
|
|
17
|
+
self.repo_path = repo_path
|
|
18
|
+
|
|
19
|
+
def parse(self, file_path: Path) -> Dict[str, Any]:
|
|
20
|
+
"""Parse a file and return its content and metadata."""
|
|
21
|
+
raise NotImplementedError
|
|
22
|
+
|
|
23
|
+
class CodeAnalyzer:
|
|
24
|
+
"""Analyzes code and configuration files."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, repo_path: Path):
|
|
27
|
+
self.repo_path = repo_path
|
|
28
|
+
self.llm = LLMManager()
|
|
29
|
+
|
|
30
|
+
# Initialize parsers
|
|
31
|
+
self.parsers = {
|
|
32
|
+
'.py': PythonParser(),
|
|
33
|
+
'.js': JavaScriptParser(),
|
|
34
|
+
'.jsx': JavaScriptParser(),
|
|
35
|
+
'.ts': JavaScriptParser(),
|
|
36
|
+
'.tsx': JavaScriptParser(),
|
|
37
|
+
'.json': ConfigParser(),
|
|
38
|
+
'.yml': ConfigParser(),
|
|
39
|
+
'.yaml': ConfigParser(),
|
|
40
|
+
'.toml': ConfigParser(),
|
|
41
|
+
'.env': ConfigParser(),
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
def _get_parser(self, file_path: str) -> Optional[Parser]:
|
|
45
|
+
"""Get appropriate parser for a file type."""
|
|
46
|
+
ext = Path(file_path).suffix
|
|
47
|
+
return self.parsers.get(ext)
|
|
48
|
+
|
|
49
|
+
def explain_code(self, file_path: str, line_number: int) -> str:
|
|
50
|
+
"""Generate explanation for code at specific location."""
|
|
51
|
+
parser = self._get_parser(file_path)
|
|
52
|
+
if not parser:
|
|
53
|
+
return f"Unsupported file type: {file_path}"
|
|
54
|
+
|
|
55
|
+
# Parse file and extract context around line
|
|
56
|
+
result = parser.parse(Path(file_path))
|
|
57
|
+
|
|
58
|
+
# Find chunk containing the line
|
|
59
|
+
target_chunk = None
|
|
60
|
+
for chunk in result['chunks']:
|
|
61
|
+
if chunk['start_line'] <= line_number <= chunk['end_line']:
|
|
62
|
+
target_chunk = chunk
|
|
63
|
+
break
|
|
64
|
+
|
|
65
|
+
if not target_chunk:
|
|
66
|
+
return f"No code found at line {line_number}"
|
|
67
|
+
|
|
68
|
+
# Get explanation from LLM
|
|
69
|
+
prompt = f"Explain this {result['file_type']} code snippet:\n\n{target_chunk['content']}"
|
|
70
|
+
explanation = self.llm.generate(prompt)
|
|
71
|
+
|
|
72
|
+
return explanation
|
|
73
|
+
|
|
74
|
+
def compare_configs(self, path1: Path, path2: Path) -> Dict[str, Any]:
|
|
75
|
+
"""Compare two configuration files and detect differences."""
|
|
76
|
+
parser = ConfigParser()
|
|
77
|
+
|
|
78
|
+
# Parse both files
|
|
79
|
+
result1 = parser.parse(path1)
|
|
80
|
+
result2 = parser.parse(path2)
|
|
81
|
+
|
|
82
|
+
# Compare parsed values
|
|
83
|
+
all_keys = set(result1['parsed'].keys()) | set(result2['parsed'].keys())
|
|
84
|
+
diffs = []
|
|
85
|
+
|
|
86
|
+
for key in sorted(all_keys):
|
|
87
|
+
val1 = result1['parsed'].get(key)
|
|
88
|
+
val2 = result2['parsed'].get(key)
|
|
89
|
+
|
|
90
|
+
if val1 != val2:
|
|
91
|
+
diffs.append({
|
|
92
|
+
'key': key,
|
|
93
|
+
'file1_value': val1,
|
|
94
|
+
'file2_value': val2
|
|
95
|
+
})
|
|
96
|
+
|
|
97
|
+
# Compare raw content for visual diff
|
|
98
|
+
diff = list(difflib.unified_diff(
|
|
99
|
+
result1['content'].splitlines(),
|
|
100
|
+
result2['content'].splitlines(),
|
|
101
|
+
fromfile=str(path1),
|
|
102
|
+
tofile=str(path2),
|
|
103
|
+
lineterm=''
|
|
104
|
+
))
|
|
105
|
+
|
|
106
|
+
return {
|
|
107
|
+
'file1_type': result1['file_type'],
|
|
108
|
+
'file2_type': result2['file_type'],
|
|
109
|
+
'differences': diffs,
|
|
110
|
+
'unified_diff': diff
|
|
111
|
+
}
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Vector embedding and similarity search functionality.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
import os
|
|
7
|
+
from typing import List, Dict, Any, Optional
|
|
8
|
+
import chromadb
|
|
9
|
+
from chromadb.config import Settings
|
|
10
|
+
from sentence_transformers import SentenceTransformer
|
|
11
|
+
|
|
12
|
+
class EmbeddingEngine:
|
|
13
|
+
"""Handles document embedding and similarity search."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, repo_path: Path):
|
|
16
|
+
self.repo_path = repo_path
|
|
17
|
+
self.model = None # Lazy load the model
|
|
18
|
+
self.db = None # Initialize ChromaDB client
|
|
19
|
+
self.collection = None
|
|
20
|
+
|
|
21
|
+
def initialize(self):
|
|
22
|
+
"""Initialize the embedding model and vector store."""
|
|
23
|
+
if self.model is None:
|
|
24
|
+
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
25
|
+
|
|
26
|
+
if self.db is None:
|
|
27
|
+
# Create .contextly directory if it doesn't exist
|
|
28
|
+
db_path = self.repo_path / '.contextly'
|
|
29
|
+
os.makedirs(db_path, exist_ok=True)
|
|
30
|
+
|
|
31
|
+
# Initialize ChromaDB with persistent storage
|
|
32
|
+
self.db = chromadb.Client(Settings(
|
|
33
|
+
persist_directory=str(db_path),
|
|
34
|
+
anonymized_telemetry=False
|
|
35
|
+
))
|
|
36
|
+
|
|
37
|
+
# Get or create collection for this repo
|
|
38
|
+
self.collection = self.db.get_or_create_collection(
|
|
39
|
+
name="contextly_docs",
|
|
40
|
+
metadata={"repo_path": str(self.repo_path)}
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def embed_documents(self, documents: List[Dict[str, Any]]):
|
|
44
|
+
"""Convert documents into vector embeddings and store them."""
|
|
45
|
+
if not documents:
|
|
46
|
+
return
|
|
47
|
+
|
|
48
|
+
# Ensure initialization
|
|
49
|
+
self.initialize()
|
|
50
|
+
|
|
51
|
+
# Prepare documents for ChromaDB
|
|
52
|
+
texts = []
|
|
53
|
+
metadatas = []
|
|
54
|
+
ids = []
|
|
55
|
+
|
|
56
|
+
for idx, doc in enumerate(documents):
|
|
57
|
+
# Create a meaningful document summary
|
|
58
|
+
doc_text = f"{doc.get('type', 'unknown')} - {doc.get('name', '')}:\n{doc.get('content', '')}"
|
|
59
|
+
texts.append(doc_text)
|
|
60
|
+
|
|
61
|
+
# Store metadata
|
|
62
|
+
metadatas.append({
|
|
63
|
+
"file_path": str(doc.get("file_path", "")),
|
|
64
|
+
"type": doc.get("type", "unknown"),
|
|
65
|
+
"name": doc.get("name", ""),
|
|
66
|
+
"start_line": doc.get("start_line", 0),
|
|
67
|
+
"end_line": doc.get("end_line", 0),
|
|
68
|
+
})
|
|
69
|
+
|
|
70
|
+
# Generate unique ID
|
|
71
|
+
doc_id = f"doc_{idx}_{hash(doc_text) & 0xFFFFFFFF}"
|
|
72
|
+
ids.append(doc_id)
|
|
73
|
+
|
|
74
|
+
# Add documents to the collection
|
|
75
|
+
self.collection.add(
|
|
76
|
+
documents=texts,
|
|
77
|
+
metadatas=metadatas,
|
|
78
|
+
ids=ids
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
def search(self, query: str, top_k: int = 5) -> Dict[str, Any]:
|
|
82
|
+
"""Search for most similar documents to query."""
|
|
83
|
+
self.initialize()
|
|
84
|
+
|
|
85
|
+
# Query the collection
|
|
86
|
+
results = self.collection.query(
|
|
87
|
+
query_texts=[query],
|
|
88
|
+
n_results=top_k
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Format results
|
|
92
|
+
hits = []
|
|
93
|
+
for i in range(len(results['ids'][0])):
|
|
94
|
+
hits.append({
|
|
95
|
+
'content': results['documents'][0][i],
|
|
96
|
+
'metadata': results['metadatas'][0][i],
|
|
97
|
+
'score': results['distances'][0][i] if 'distances' in results else None,
|
|
98
|
+
'file': results['metadatas'][0][i]['file_path']
|
|
99
|
+
})
|
|
100
|
+
|
|
101
|
+
return {
|
|
102
|
+
'query': query,
|
|
103
|
+
'results': hits
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
def clear(self):
|
|
107
|
+
"""Clear all embeddings from storage."""
|
|
108
|
+
if self.collection:
|
|
109
|
+
self.collection.delete()
|
contextly/core/sync.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Repository synchronization and indexing functionality.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import List, Dict, Any, Iterator
|
|
7
|
+
import os
|
|
8
|
+
|
|
9
|
+
from ..parsers.python import PythonParser
|
|
10
|
+
from ..parsers.javascript import JavaScriptParser
|
|
11
|
+
from ..parsers.config import ConfigParser
|
|
12
|
+
|
|
13
|
+
class RepoSync:
|
|
14
|
+
"""Handles repository scanning and indexing."""
|
|
15
|
+
|
|
16
|
+
SUPPORTED_EXTENSIONS = {
|
|
17
|
+
'.py', '.js', '.json', '.yml', '.yaml',
|
|
18
|
+
'.env', '.toml', '.md', '.txt'
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
def __init__(self, repo_path: Path):
|
|
22
|
+
self.repo_path = repo_path
|
|
23
|
+
|
|
24
|
+
def scan_files(self) -> Iterator[Path]:
|
|
25
|
+
"""Scan repository for supported files."""
|
|
26
|
+
for root, _, files in os.walk(self.repo_path):
|
|
27
|
+
root_path = Path(root)
|
|
28
|
+
if '.git' in root_path.parts:
|
|
29
|
+
continue
|
|
30
|
+
|
|
31
|
+
for file in files:
|
|
32
|
+
file_path = root_path / file
|
|
33
|
+
if file_path.suffix in self.SUPPORTED_EXTENSIONS:
|
|
34
|
+
yield file_path
|
|
35
|
+
|
|
36
|
+
def index_repository(self) -> Dict[str, Any]:
|
|
37
|
+
"""Build index of repository contents."""
|
|
38
|
+
index = {}
|
|
39
|
+
parsers = {
|
|
40
|
+
'.py': PythonParser(),
|
|
41
|
+
'.js': JavaScriptParser(),
|
|
42
|
+
'.jsx': JavaScriptParser(),
|
|
43
|
+
'.ts': JavaScriptParser(),
|
|
44
|
+
'.tsx': JavaScriptParser(),
|
|
45
|
+
'.json': ConfigParser(),
|
|
46
|
+
'.yml': ConfigParser(),
|
|
47
|
+
'.yaml': ConfigParser(),
|
|
48
|
+
'.toml': ConfigParser(),
|
|
49
|
+
'.env': ConfigParser(),
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
for file_path in self.scan_files():
|
|
53
|
+
ext = file_path.suffix
|
|
54
|
+
parser = parsers.get(ext)
|
|
55
|
+
|
|
56
|
+
if parser:
|
|
57
|
+
try:
|
|
58
|
+
result = parser.parse(file_path)
|
|
59
|
+
# Add file path to chunks for reference
|
|
60
|
+
for chunk in result.get('chunks', []):
|
|
61
|
+
chunk['file_path'] = str(file_path)
|
|
62
|
+
index[str(file_path)] = result
|
|
63
|
+
except Exception as e:
|
|
64
|
+
print(f"Error parsing {file_path}: {e}")
|
|
65
|
+
|
|
66
|
+
return index
|
contextly/llm/base.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Abstract base class for LLM providers.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import Dict, Any, Optional
|
|
7
|
+
|
|
8
|
+
class LLMProvider(ABC):
|
|
9
|
+
"""Base class for LLM providers."""
|
|
10
|
+
|
|
11
|
+
@abstractmethod
|
|
12
|
+
def generate_response(self, prompt: str, **kwargs) -> str:
|
|
13
|
+
"""Generate a response from the LLM."""
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
@abstractmethod
|
|
17
|
+
def is_available(self) -> bool:
|
|
18
|
+
"""Check if this LLM provider is available."""
|
|
19
|
+
pass
|