contextly 0.1.0__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {contextly-0.1.0 → contextly-0.1.7}/PKG-INFO +7 -12
  2. {contextly-0.1.0 → contextly-0.1.7}/pyproject.toml +8 -15
  3. contextly-0.1.7/requirements.txt +19 -0
  4. contextly-0.1.7/src/contextly/app.py +111 -0
  5. {contextly-0.1.0 → contextly-0.1.7}/src/contextly/cli.py +49 -13
  6. contextly-0.1.7/src/contextly/core/__init__.py +0 -0
  7. {contextly-0.1.0 → contextly-0.1.7}/src/contextly/core/analyzer.py +13 -6
  8. contextly-0.1.7/src/contextly/core/embeddings.py +129 -0
  9. contextly-0.1.7/src/contextly/core/sync.py +89 -0
  10. contextly-0.1.7/src/contextly/llm/__init__.py +13 -0
  11. {contextly-0.1.0 → contextly-0.1.7}/src/contextly/llm/manager.py +13 -0
  12. {contextly-0.1.0 → contextly-0.1.7}/src/contextly/llm/models.py +48 -15
  13. {contextly-0.1.0 → contextly-0.1.7}/src/contextly/llm/ollama.py +5 -0
  14. {contextly-0.1.0 → contextly-0.1.7}/src/contextly/llm/openai.py +3 -1
  15. contextly-0.1.7/tests/conftest.py +33 -0
  16. {contextly-0.1.0 → contextly-0.1.7}/tests/test_core.py +0 -1
  17. contextly-0.1.7/tests/test_integration.py +170 -0
  18. contextly-0.1.0/src/contextly/app.py +0 -85
  19. contextly-0.1.0/src/contextly/core/embeddings.py +0 -109
  20. contextly-0.1.0/src/contextly/core/sync.py +0 -66
  21. {contextly-0.1.0 → contextly-0.1.7}/.gitignore +0 -0
  22. {contextly-0.1.0 → contextly-0.1.7}/LICENSE +0 -0
  23. {contextly-0.1.0 → contextly-0.1.7}/README.md +0 -0
  24. {contextly-0.1.0 → contextly-0.1.7}/src/contextly/__init__.py +0 -0
  25. {contextly-0.1.0 → contextly-0.1.7}/src/contextly/llm/base.py +0 -0
  26. {contextly-0.1.0 → contextly-0.1.7}/src/contextly/parsers/base.py +0 -0
  27. {contextly-0.1.0 → contextly-0.1.7}/src/contextly/parsers/config.py +0 -0
  28. {contextly-0.1.0 → contextly-0.1.7}/src/contextly/parsers/javascript.py +0 -0
  29. {contextly-0.1.0 → contextly-0.1.7}/src/contextly/parsers/python.py +0 -0
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: contextly
3
- Version: 0.1.0
3
+ Version: 0.1.7
4
4
  Summary: AI Context Engine for Developers
5
- Project-URL: Homepage, https://github.com/contextly/contextly
6
- Project-URL: Repository, https://github.com/contextly/contextly
7
- Project-URL: Documentation, https://github.com/contextly/contextly#readme
8
- Project-URL: Bug Tracker, https://github.com/contextly/contextly/issues
9
- Author-email: Contextly Team <team@contextly.dev>
5
+ Project-URL: Homepage, https://github.com/desenyon/contextly
6
+ Project-URL: Repository, https://github.com/desenyon/contextly
7
+ Project-URL: Documentation, https://github.com/desenyon/contextly#readme
8
+ Project-URL: Bug Tracker, https://github.com/desenyon/contextly/issues
9
+ Author-email: Contextly Team <desenyon@gmail.com>
10
10
  License-Expression: MIT
11
11
  License-File: LICENSE
12
12
  Keywords: ai,analysis,code,context,development,documentation,llm
@@ -41,16 +41,11 @@ Requires-Dist: toml>=0.10.2
41
41
  Requires-Dist: tqdm>=4.66.1
42
42
  Requires-Dist: transformers>=4.35.0
43
43
  Requires-Dist: typer>=0.9.0
44
- Provides-Extra: dev
45
- Requires-Dist: black>=23.0.0; extra == 'dev'
46
- Requires-Dist: isort>=5.12.0; extra == 'dev'
47
- Requires-Dist: mypy>=1.5.0; extra == 'dev'
48
- Requires-Dist: pre-commit>=3.3.0; extra == 'dev'
49
- Requires-Dist: ruff>=0.1.0; extra == 'dev'
50
44
  Provides-Extra: test
51
45
  Requires-Dist: black>=23.0.0; extra == 'test'
52
46
  Requires-Dist: isort>=5.12.0; extra == 'test'
53
47
  Requires-Dist: mypy>=1.5.0; extra == 'test'
48
+ Requires-Dist: pre-commit>=3.3.0; extra == 'test'
54
49
  Requires-Dist: pytest-asyncio>=0.21.0; extra == 'test'
55
50
  Requires-Dist: pytest-cov>=4.0.0; extra == 'test'
56
51
  Requires-Dist: pytest>=7.0.0; extra == 'test'
@@ -4,9 +4,9 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "contextly"
7
- version = "0.1.0"
7
+ version = "0.1.7"
8
8
  authors = [
9
- { name = "Contextly Team", email = "team@contextly.dev" },
9
+ { name = "Contextly Team", email = "desenyon@gmail.com" },
10
10
  ]
11
11
  description = "AI Context Engine for Developers"
12
12
  readme = "README.md"
@@ -58,29 +58,22 @@ test = [
58
58
  "isort>=5.12.0",
59
59
  "mypy>=1.5.0",
60
60
  "ruff>=0.1.0",
61
- ]
62
-
63
- dev = [
64
- "pre-commit>=3.3.0",
65
- "black>=23.0.0",
66
- "isort>=5.12.0",
67
- "mypy>=1.5.0",
68
- "ruff>=0.1.0",
61
+ "pre-commit>=3.3.0"
69
62
  ]
70
63
 
71
64
  [project.scripts]
72
65
  contextly = "contextly.cli:app"
73
66
 
74
67
  [project.urls]
75
- Homepage = "https://github.com/contextly/contextly"
76
- Repository = "https://github.com/contextly/contextly"
77
- Documentation = "https://github.com/contextly/contextly#readme"
78
- "Bug Tracker" = "https://github.com/contextly/contextly/issues"
68
+ Homepage = "https://github.com/desenyon/contextly"
69
+ Repository = "https://github.com/desenyon/contextly"
70
+ Documentation = "https://github.com/desenyon/contextly#readme"
71
+ "Bug Tracker" = "https://github.com/desenyon/contextly/issues"
79
72
 
80
73
  [tool.pytest.ini_options]
81
74
  testpaths = ["tests"]
82
75
  python_files = ["test_*.py"]
83
- addopts = "--cov=contextly --cov-report=term-missing"
76
+ addopts = "-v"
84
77
 
85
78
  [tool.black]
86
79
  line-length = 100
@@ -0,0 +1,19 @@
1
+ # Core dependencies
2
+ pyyaml>=6.0.1
3
+ python-dotenv>=1.0.0
4
+
5
+ # LLM and embeddings
6
+ openai>=1.0.0
7
+ langchain>=0.0.300
8
+ transformers>=4.34.0
9
+ sentence-transformers>=2.2.2
10
+
11
+ # Testing
12
+ pytest>=7.4.2
13
+ pytest-cov>=4.1.0
14
+
15
+ # Development
16
+ black>=23.9.1
17
+ isort>=5.12.0
18
+ flake8>=6.1.0
19
+ mypy>=1.5.1
@@ -0,0 +1,111 @@
1
+ """
2
+ Core business logic for Contextly commands.
3
+ """
4
+
5
+ from pathlib import Path
6
+ from typing import Dict, Any, Optional
7
+
8
+ from .core.analyzer import CodeAnalyzer
9
+ from .core.embeddings import EmbeddingEngine
10
+ from .core.sync import RepoSync
11
+ from .llm import LLMManager
12
+
13
+ class Contextly:
14
+ """Main class coordinating Contextly's functionality."""
15
+
16
+ def __init__(self, repo_path: Optional[Path] = None, model: Optional[str] = None):
17
+ self.repo_path = repo_path or Path.cwd()
18
+ self.llm_manager = LLMManager(model)
19
+ self.analyzer = CodeAnalyzer(self.repo_path)
20
+ self.embedding_engine = EmbeddingEngine(self.repo_path)
21
+ self.repo_sync = RepoSync(self.repo_path)
22
+
23
+ def ask(self, question: str) -> Dict[str, Any]:
24
+ """Answer questions about the codebase."""
25
+ try:
26
+ # Search for relevant code
27
+ search_results = self.embedding_engine.search(question)
28
+
29
+ # Build context from search results
30
+ context = []
31
+ for result in search_results['results']:
32
+ context.append(f"From {result['file']}:\n{result['content']}\n")
33
+
34
+ if not context:
35
+ return {
36
+ 'answer': 'No relevant code found to answer the question.',
37
+ 'context': search_results
38
+ }
39
+
40
+ # Generate answer using LLM
41
+ context_str = '\n'.join(context)
42
+ context_dict = {
43
+ 'question': question,
44
+ 'code_snippets': context,
45
+ 'files': [r['file'] for r in search_results['results']]
46
+ }
47
+
48
+ answer = self.llm_manager.explain_code(context_str, context_dict)
49
+
50
+ return {
51
+ 'answer': answer,
52
+ 'context': search_results
53
+ }
54
+ except Exception as e:
55
+ return {
56
+ 'error': f'Failed to process question: {str(e)}',
57
+ 'context': None
58
+ }
59
+
60
+ def explain(self, location: str) -> str:
61
+ """Explain code at specific location."""
62
+ try:
63
+ if ':' not in location:
64
+ return "Invalid location format. Use 'file:line_number'"
65
+ file_path, line_number = location.split(':')
66
+ return self.analyzer.explain_code(file_path, int(line_number))
67
+ except ValueError:
68
+ return f"Invalid line number in location: {location}"
69
+ except Exception as e:
70
+ return f"Error explaining code: {str(e)}"
71
+
72
+ def search(self, term: str) -> Dict[str, Any]:
73
+ """Semantic search across codebase."""
74
+ try:
75
+ results = self.embedding_engine.search(term)
76
+ return {
77
+ 'query': results['query'],
78
+ 'results': results['results']
79
+ }
80
+ except Exception as e:
81
+ return {
82
+ 'query': term,
83
+ 'error': str(e),
84
+ 'results': []
85
+ }
86
+
87
+ def diff(self, path1: Path, path2: Path) -> Dict[str, Any]:
88
+ """Compare configuration files."""
89
+ return self.analyzer.compare_configs(path1, path2)
90
+
91
+ def sync(self) -> None:
92
+ """Build or rebuild the embedding index."""
93
+ # Scan repository
94
+ files = list(self.repo_sync.scan_files())
95
+
96
+ # Build index
97
+ index = self.repo_sync.index_repository()
98
+
99
+ # Convert index to list of documents
100
+ docs = []
101
+ for file_path, file_data in index.items():
102
+ docs.extend(file_data.get('chunks', []))
103
+
104
+ # Build or update the embedding index with the documents
105
+ try:
106
+ if docs:
107
+ self.embedding_engine.embed_documents(docs)
108
+ else:
109
+ print("No documents found to index")
110
+ except Exception as e:
111
+ raise RuntimeError(f"Failed to update search index: {str(e)}")
@@ -2,6 +2,7 @@
2
2
  Main CLI interface for Contextly
3
3
  """
4
4
 
5
+ import os
5
6
  import typer
6
7
  from rich import print
7
8
  from rich.console import Console
@@ -36,17 +37,28 @@ def ask(
36
37
  print("🔍 Searching for answer...")
37
38
  result = get_contextly().ask(question)
38
39
 
40
+ # Print answer or error in a panel
41
+ if 'error' in result:
42
+ console.print(Panel(
43
+ result['error'],
44
+ title="❌ Error",
45
+ border_style="red"
46
+ ))
47
+ return
48
+
39
49
  # Print answer in a panel
40
- console.print(Panel(
41
- Markdown(result['answer']),
42
- title="💡 Answer",
43
- border_style="blue"
44
- ))
45
-
46
- # Print relevant code snippets
47
- if result.get('context', {}).get('results'):
48
- console.print("\n📚 Relevant code:")
49
- for snippet in result['context']['results']:
50
+ if 'answer' in result:
51
+ console.print(Panel(
52
+ Markdown(result['answer']),
53
+ title="💡 Answer",
54
+ border_style="blue"
55
+ ))
56
+
57
+ # Print relevant code snippets
58
+ if result.get('context', {}).get('results'):
59
+ console.print("\n📚 Relevant code:")
60
+ for snippet in result['context']['results']:
61
+ console.print("") # Add a blank line for readability
50
62
  console.print(Panel(
51
63
  Syntax(
52
64
  snippet['content'],
@@ -133,9 +145,33 @@ def sync(
133
145
  ),
134
146
  ) -> None:
135
147
  """Build or rebuild the local embedding index."""
136
- print("🔄 Syncing repository...")
137
- get_contextly(path).sync()
138
- print(" Repository indexed successfully!")
148
+ # Create fancy header
149
+ console.print(Panel(
150
+ "[bold blue]Contextly Repository Sync[/]",
151
+ subtitle="Building semantic search index",
152
+ style="blue"
153
+ ))
154
+
155
+ contextly = get_contextly(path)
156
+
157
+ # Check model status first
158
+ model_name = os.getenv('CONTEXTLY_MODEL', 'codellama')
159
+ status = contextly.llm_manager.model_manager.registry.check_model_status(model_name)
160
+
161
+ if status['installed'] and status['ready']:
162
+ console.print(f"[green]✓[/] Using existing {model_name} model")
163
+ else:
164
+ console.print(f"[yellow]![/] {status['message']}")
165
+ console.print(f"[yellow]⚡[/] Downloading {model_name}...")
166
+
167
+ with console.status("[bold blue]📚 Analyzing repository structure...") as status:
168
+ try:
169
+ contextly.sync()
170
+ console.print("\n[green]✓[/] Repository indexed successfully!")
171
+ console.print("[dim]Run 'contextly ask' to start querying your codebase[/]")
172
+ except Exception as e:
173
+ console.print(f"\n[red]✗[/] Error: {str(e)}")
174
+ raise typer.Exit(1)
139
175
 
140
176
  @model_app.command("list")
141
177
  def list_models() -> None:
File without changes
@@ -5,10 +5,12 @@ Core functionality for code parsing and analysis.
5
5
  import difflib
6
6
  from pathlib import Path
7
7
  from typing import List, Dict, Any, Optional
8
- from ..llm import LLMManager
9
- from ..parsers.python import PythonParser
10
- from ..parsers.javascript import JavaScriptParser
11
- from ..parsers.config import ConfigParser
8
+ from typing import Type
9
+ from contextly.llm.manager import LLMManager
10
+ from contextly.parsers.base import BaseParser
11
+ from contextly.parsers.python import PythonParser
12
+ from contextly.parsers.javascript import JavaScriptParser
13
+ from contextly.parsers.config import ConfigParser
12
14
 
13
15
  class Parser:
14
16
  """Base class for parsing different file types."""
@@ -66,8 +68,13 @@ class CodeAnalyzer:
66
68
  return f"No code found at line {line_number}"
67
69
 
68
70
  # Get explanation from LLM
69
- prompt = f"Explain this {result['file_type']} code snippet:\n\n{target_chunk['content']}"
70
- explanation = self.llm.generate(prompt)
71
+ code = target_chunk['content']
72
+ context = {
73
+ 'file_type': result['file_type'],
74
+ 'file_path': file_path,
75
+ 'line_number': line_number
76
+ }
77
+ explanation = self.llm.explain_code(code, context)
71
78
 
72
79
  return explanation
73
80
 
@@ -0,0 +1,129 @@
1
+ """
2
+ Vector embedding and similarity search functionality.
3
+ """
4
+
5
+ from pathlib import Path
6
+ import os
7
+ from typing import List, Dict, Any, Optional
8
+ import chromadb
9
+ from chromadb.config import Settings
10
+ from sentence_transformers import SentenceTransformer
11
+
12
+ class EmbeddingEngine:
13
+ """Handles document embedding and similarity search."""
14
+
15
+ def __init__(self, repo_path: Path):
16
+ """Initialize the embedding engine with the given repository path."""
17
+ self.repo_path = repo_path
18
+ self.model: Optional[SentenceTransformer] = None
19
+ self.db = None
20
+ self.collection = None
21
+ self.data_dir = self.repo_path / '.contextly' / 'embeddings'
22
+ self.collection_name = "code_embeddings"
23
+ self.data_dir.mkdir(parents=True, exist_ok=True)
24
+
25
+ def _initialize(self) -> None:
26
+ """Initialize the model and database if not already initialized."""
27
+ if self.model is None:
28
+ try:
29
+ self.model = SentenceTransformer('all-MiniLM-L6-v2')
30
+ except Exception as e:
31
+ raise RuntimeError(f"Failed to initialize embedding model: {str(e)}")
32
+
33
+ if self.db is None:
34
+ try:
35
+ self.db = chromadb.PersistentClient(path=str(self.data_dir))
36
+ self.collection = self.db.get_or_create_collection(
37
+ name=self.collection_name,
38
+ metadata={"repo_path": str(self.repo_path)}
39
+ )
40
+ except Exception as e:
41
+ raise RuntimeError(f"Failed to initialize ChromaDB: {str(e)}")
42
+
43
+ def _ensure_initialized(self) -> None:
44
+ """Ensure both model and database are initialized."""
45
+ if self.model is None or self.db is None or self.collection is None:
46
+ self._initialize()
47
+ """Initialize the embedding model and vector store if not already initialized."""
48
+ self._ensure_initialized()
49
+
50
+ def embed_documents(self, documents: List[Dict[str, Any]]):
51
+ """Convert documents into vector embeddings and store them."""
52
+ if not documents:
53
+ return
54
+
55
+ # Ensure initialization
56
+ self._ensure_initialized()
57
+
58
+ # Prepare documents for ChromaDB
59
+ texts = []
60
+ metadatas = []
61
+ ids = []
62
+
63
+ for idx, doc in enumerate(documents):
64
+ # Create a meaningful document summary
65
+ doc_text = f"{doc.get('type', 'unknown')} - {doc.get('name', '')}:\n{doc.get('content', '')}"
66
+ texts.append(doc_text)
67
+
68
+ # Store metadata
69
+ metadatas.append({
70
+ "file_path": str(doc.get("file_path", "")),
71
+ "type": doc.get("type", "unknown"),
72
+ "name": doc.get("name", ""),
73
+ "start_line": doc.get("start_line", 0),
74
+ "end_line": doc.get("end_line", 0),
75
+ })
76
+
77
+ # Generate unique ID
78
+ doc_id = f"doc_{idx}_{hash(doc_text) & 0xFFFFFFFF}"
79
+ ids.append(doc_id)
80
+
81
+ # Ensure collection is initialized
82
+ if self.collection is None:
83
+ raise RuntimeError("Collection is not initialized.")
84
+ # Add documents to the collection
85
+ self.collection.add(
86
+ documents=texts,
87
+ metadatas=metadatas,
88
+ ids=ids
89
+ )
90
+
91
+ def search(self, query: str, top_k: int = 5) -> Dict[str, Any]:
92
+ """Search for most similar documents to query."""
93
+ self._ensure_initialized()
94
+
95
+ # Ensure collection is initialized
96
+ if self.collection is None:
97
+ raise RuntimeError("Collection is not initialized.")
98
+
99
+ # Query the collection
100
+ results = self.collection.query(
101
+ query_texts=[query],
102
+ n_results=top_k
103
+ )
104
+
105
+ # Format results
106
+ hits = []
107
+ documents = results.get('documents')
108
+ metadatas = results.get('metadatas')
109
+ distances = results.get('distances')
110
+ ids = results.get('ids')
111
+
112
+ if documents is not None and metadatas is not None and ids is not None:
113
+ for i in range(len(ids[0])):
114
+ hits.append({
115
+ 'content': documents[0][i] if documents[0] is not None else None,
116
+ 'metadata': metadatas[0][i] if metadatas[0] is not None else None,
117
+ 'score': distances[0][i] if distances and distances[0] is not None else None,
118
+ 'file': metadatas[0][i]['file_path'] if metadatas[0] is not None else None
119
+ })
120
+
121
+ return {
122
+ 'query': query,
123
+ 'results': hits
124
+ }
125
+
126
+ def clear(self):
127
+ """Clear all embeddings from storage."""
128
+ if self.collection:
129
+ self.collection.delete()
@@ -0,0 +1,89 @@
1
+ """
2
+ Repository synchronization and indexing functionality.
3
+ """
4
+
5
+ from pathlib import Path
6
+ from typing import List, Dict, Any, Iterator, Tuple
7
+ import os
8
+ from concurrent.futures import ProcessPoolExecutor, as_completed
9
+ import multiprocessing
10
+ from tqdm import tqdm
11
+
12
+ from ..parsers.python import PythonParser
13
+ from ..parsers.javascript import JavaScriptParser
14
+ from ..parsers.config import ConfigParser
15
+
16
+ class RepoSync:
17
+ """Handles repository scanning and indexing."""
18
+
19
+ SUPPORTED_EXTENSIONS = {
20
+ # Code files
21
+ '.py': PythonParser,
22
+ '.js': JavaScriptParser,
23
+ '.jsx': JavaScriptParser,
24
+ '.ts': JavaScriptParser,
25
+ '.tsx': JavaScriptParser,
26
+ # Config files
27
+ '.json': ConfigParser,
28
+ '.yml': ConfigParser,
29
+ '.yaml': ConfigParser,
30
+ '.toml': ConfigParser,
31
+ '.env': ConfigParser,
32
+ }
33
+
34
+ IGNORED_DIRS = {'.git', '__pycache__', 'node_modules', 'venv', '.venv', 'dist', 'build'}
35
+
36
+ def __init__(self, repo_path: Path):
37
+ self.repo_path = repo_path
38
+ self.num_workers = max(1, multiprocessing.cpu_count() - 1)
39
+
40
+ def scan_files(self) -> List[Path]:
41
+ """Scan repository for supported files."""
42
+ files = []
43
+ for root, dirs, filenames in os.walk(self.repo_path):
44
+ # Skip ignored directories
45
+ dirs[:] = [d for d in dirs if d not in self.IGNORED_DIRS]
46
+
47
+ root_path = Path(root)
48
+ for file in filenames:
49
+ file_path = root_path / file
50
+ if file_path.suffix in self.SUPPORTED_EXTENSIONS:
51
+ files.append(file_path)
52
+ return files
53
+
54
+ def _process_file(self, file_path: Path) -> Tuple[str, Dict[str, Any]]:
55
+ """Process a single file and return its index data."""
56
+ parser_class = self.SUPPORTED_EXTENSIONS.get(file_path.suffix)
57
+ if not parser_class:
58
+ return str(file_path), {}
59
+
60
+ try:
61
+ parser = parser_class()
62
+ result = parser.parse(file_path)
63
+ # Add file path to chunks for reference
64
+ for chunk in result.get('chunks', []):
65
+ chunk['file_path'] = str(file_path)
66
+ return str(file_path), result
67
+ except Exception as e:
68
+ return str(file_path), {'error': str(e)}
69
+
70
+ def index_repository(self) -> Dict[str, Any]:
71
+ """Build index of repository contents using parallel processing."""
72
+ files = self.scan_files()
73
+ total_files = len(files)
74
+
75
+ if total_files == 0:
76
+ return {}
77
+
78
+ index = {}
79
+ with ProcessPoolExecutor(max_workers=self.num_workers) as executor:
80
+ futures = [executor.submit(self._process_file, f) for f in files]
81
+
82
+ with tqdm(total=total_files, desc="Analyzing files", unit="file") as pbar:
83
+ for future in as_completed(futures):
84
+ file_path, result = future.result()
85
+ if result and 'error' not in result:
86
+ index[file_path] = result
87
+ pbar.update(1)
88
+
89
+ return index
@@ -0,0 +1,13 @@
1
+ """LLM package for Contextly."""
2
+
3
+ from contextly.llm.manager import LLMManager
4
+ from contextly.llm.base import LLMProvider
5
+ from contextly.llm.models import ModelManager, ModelRegistry, ModelProvider
6
+
7
+ __all__ = [
8
+ 'LLMManager',
9
+ 'LLMProvider',
10
+ 'ModelManager',
11
+ 'ModelRegistry',
12
+ 'ModelProvider'
13
+ ]
@@ -14,10 +14,13 @@ class LLMManager:
14
14
  """Manages LLM providers and generates code explanations."""
15
15
 
16
16
  def __init__(self, model: Optional[str] = None):
17
+ """Initialize the LLM manager with the given model."""
17
18
  self.model_manager = ModelManager()
18
19
  self.providers: Dict[str, LLMProvider] = {}
19
20
  self.current_model = model or os.getenv('CONTEXTLY_MODEL', 'codellama')
21
+ self.initialized = False
20
22
  self._initialize_providers()
23
+ self.initialized = True
21
24
 
22
25
  def _initialize_providers(self) -> None:
23
26
  """Initialize LLM providers."""
@@ -27,6 +30,16 @@ class LLMManager:
27
30
  # Add OpenAI if key is available
28
31
  if os.getenv('OPENAI_API_KEY'):
29
32
  self.providers['openai'] = OpenAIProvider()
33
+
34
+ # Ensure the default provider is available
35
+ self.ensure_default_provider()
36
+
37
+ def ensure_default_provider(self) -> None:
38
+ """Ensure that a default provider is available."""
39
+ # Try to set up Ollama with codellama as default
40
+ if 'ollama' in self.providers:
41
+ if not self.model_manager.registry.get_model('codellama'):
42
+ self.model_manager.download_model('codellama', ModelProvider.OLLAMA)
30
43
 
31
44
  def get_available_provider(self) -> Optional[LLMProvider]:
32
45
  """Get the appropriate provider for the current model."""
@@ -2,7 +2,7 @@
2
2
  Model management system for various LLM providers.
3
3
  """
4
4
 
5
- from dataclasses import dataclass
5
+ from dataclasses import dataclass, field
6
6
  from enum import Enum
7
7
  from typing import Dict, Any, Optional, List
8
8
  import requests
@@ -25,7 +25,7 @@ class ModelInfo:
25
25
  description: str
26
26
  context_length: int
27
27
  quantization: Optional[str] = None
28
- metadata: Dict[str, Any] = None
28
+ metadata: Dict[str, Any] = field(default_factory=dict)
29
29
 
30
30
  class ModelRegistry:
31
31
  """Registry of available models and their capabilities."""
@@ -35,21 +35,54 @@ class ModelRegistry:
35
35
  self.models: Dict[str, ModelInfo] = {}
36
36
  self._load_models()
37
37
 
38
+ def check_model_status(self, model_name: str) -> Dict[str, Any]:
39
+ """Check if a model is installed and ready to use."""
40
+ import subprocess
41
+ status = {
42
+ 'installed': False,
43
+ 'ready': False,
44
+ 'message': ''
45
+ }
46
+
47
+ if model_name.startswith('codellama'):
48
+ # Check if Ollama is running
49
+ try:
50
+ result = subprocess.run(['ollama', 'list'], capture_output=True, text=True)
51
+ models = result.stdout.strip().split('\n')
52
+ status['installed'] = any(model_name in model for model in models)
53
+ if status['installed']:
54
+ status['ready'] = True
55
+ status['message'] = '✅ Model is installed and ready'
56
+ else:
57
+ status['message'] = '⚠️ Model not found in Ollama'
58
+ except Exception:
59
+ status['message'] = '❌ Ollama not running or not installed'
60
+ return status
61
+
38
62
  def _load_models(self) -> None:
39
63
  """Load model registry from config file."""
40
- if self.config_path.exists():
41
- with open(self.config_path, 'r') as f:
42
- data = json.load(f)
43
- for model_data in data.get('models', []):
44
- info = ModelInfo(
45
- name=model_data['name'],
46
- provider=ModelProvider(model_data['provider']),
47
- description=model_data.get('description', ''),
48
- context_length=model_data.get('context_length', 4096),
49
- quantization=model_data.get('quantization'),
50
- metadata=model_data.get('metadata', {})
51
- )
52
- self.models[model_data['name']] = info
64
+ try:
65
+ if self.config_path.exists():
66
+ with open(self.config_path, 'r') as f:
67
+ try:
68
+ data = json.load(f)
69
+ for model_data in data.get('models', []):
70
+ try:
71
+ info = ModelInfo(
72
+ name=model_data['name'],
73
+ provider=ModelProvider(model_data['provider']),
74
+ description=model_data.get('description', ''),
75
+ context_length=model_data.get('context_length', 4096),
76
+ quantization=model_data.get('quantization'),
77
+ metadata=model_data.get('metadata', {})
78
+ )
79
+ self.models[model_data['name']] = info
80
+ except (KeyError, ValueError) as e:
81
+ print(f"Warning: Skipping invalid model data: {e}")
82
+ except Exception as e:
83
+ print(f"Error loading models from config: {e}")
84
+ except Exception as e:
85
+ print(f"Error accessing config file: {e}")
53
86
 
54
87
  def _save_models(self) -> None:
55
88
  """Save model registry to config file."""
@@ -12,6 +12,7 @@ class OllamaProvider(LLMProvider):
12
12
 
13
13
  DEFAULT_MODEL = "codellama"
14
14
  BASE_URL = "http://localhost:11434/api"
15
+ _model_checked = {} # Class variable to track which models have been checked
15
16
 
16
17
  def __init__(self, model: str = DEFAULT_MODEL):
17
18
  self.model = model
@@ -19,6 +20,9 @@ class OllamaProvider(LLMProvider):
19
20
 
20
21
  def _ensure_model(self) -> None:
21
22
  """Ensure the model is downloaded and ready."""
23
+ if self.model in self._model_checked:
24
+ return
25
+
22
26
  try:
23
27
  # Check if model exists
24
28
  response = requests.get(f"{self.BASE_URL}/tags")
@@ -34,6 +38,7 @@ class OllamaProvider(LLMProvider):
34
38
  if pull_response.status_code != 200:
35
39
  raise RuntimeError(f"Failed to pull model: {pull_response.text}")
36
40
  print(f"{self.model} ready!")
41
+ self._model_checked[self.model] = True
37
42
  except Exception as e:
38
43
  raise RuntimeError(f"Failed to set up Ollama: {str(e)}")
39
44
 
@@ -10,7 +10,7 @@ from .base import LLMProvider
10
10
  class OpenAIProvider(LLMProvider):
11
11
  """LLM provider using OpenAI API."""
12
12
 
13
- DEFAULT_MODEL = "gpt-4-mini"
13
+ DEFAULT_MODEL = "gpt-3.5-turbo"
14
14
 
15
15
  def __init__(self, model: str = DEFAULT_MODEL, api_key: Optional[str] = None):
16
16
  self.model = model
@@ -27,6 +27,8 @@ class OpenAIProvider(LLMProvider):
27
27
  raise RuntimeError("OpenAI API is not configured")
28
28
 
29
29
  try:
30
+ if self.client is None:
31
+ raise RuntimeError("OpenAI client is not initialized. Please provide a valid API key.")
30
32
  response = self.client.chat.completions.create(
31
33
  model=self.model,
32
34
  messages=[{"role": "user", "content": prompt}],
@@ -0,0 +1,33 @@
1
+ """
2
+ Configuration and fixtures for pytest.
3
+ """
4
+
5
+ import os
6
+ import pytest
7
+ from pathlib import Path
8
+
9
+ @pytest.fixture
10
+ def test_repo_path():
11
+ """Get the path to the test repository."""
12
+ # Return the path to the contextly repository itself
13
+ return Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
14
+
15
+ @pytest.fixture(autouse=True)
16
+ def setup_environment():
17
+ """Set up test environment variables."""
18
+ # Store existing environment variables
19
+ old_env = {}
20
+ if 'CONTEXTLY_MODEL' in os.environ:
21
+ old_env['CONTEXTLY_MODEL'] = os.environ['CONTEXTLY_MODEL']
22
+
23
+ # Set test environment variables
24
+ os.environ['CONTEXTLY_MODEL'] = 'codellama'
25
+
26
+ yield
27
+
28
+ # Restore environment variables
29
+ for key in ['CONTEXTLY_MODEL']:
30
+ if key in old_env:
31
+ os.environ[key] = old_env[key]
32
+ elif key in os.environ:
33
+ del os.environ[key]
@@ -117,7 +117,6 @@ def test_embedding_engine(sample_repo_path):
117
117
  engine = EmbeddingEngine(sample_repo_path)
118
118
 
119
119
  # Test initialization
120
- engine.initialize()
121
120
  assert engine.model is not None
122
121
  assert engine.db is not None
123
122
 
@@ -0,0 +1,170 @@
1
+ """
2
+ Integration tests for Contextly using its own codebase as test data.
3
+ """
4
+
5
+ import os
6
+ import pytest
7
+ from pathlib import Path
8
+ from contextly.app import Contextly
9
+ from contextly.core.sync import RepoSync
10
+ from contextly.core.embeddings import EmbeddingEngine
11
+ from contextly.llm.manager import LLMManager
12
+ from contextly.llm.ollama import OllamaProvider
13
+
14
+ @pytest.fixture
15
+ def repo_path():
16
+ """Get the path to the Contextly repository."""
17
+ # Get the tests directory and go up one level
18
+ return Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
19
+
20
+ @pytest.fixture
21
+ def contextly_app(repo_path):
22
+ """Create a Contextly instance for testing."""
23
+ return Contextly(repo_path)
24
+
25
+ def test_repo_sync(repo_path):
26
+ """Test repository synchronization with Contextly's own codebase."""
27
+ repo_sync = RepoSync(repo_path)
28
+ results = repo_sync.scan_files()
29
+
30
+ # Verify essential files are found
31
+ python_files = [f for f in results if str(f).endswith('.py')]
32
+ assert len(python_files) > 0, "Should find Python files"
33
+
34
+ # Verify core modules are found
35
+ core_files = {
36
+ 'app.py',
37
+ 'cli.py',
38
+ 'embeddings.py',
39
+ 'analyzer.py',
40
+ 'sync.py'
41
+ }
42
+ found_files = {f.name for f in python_files}
43
+ assert core_files.intersection(found_files), "Should find core modules"
44
+
45
+ def test_embedding_engine(repo_path):
46
+ """Test embedding engine with Contextly's codebase."""
47
+ engine = EmbeddingEngine(repo_path)
48
+
49
+ # Test search functionality
50
+ results = engine.search("What is the main purpose of this codebase?")
51
+ assert results is not None
52
+ assert 'results' in results
53
+ assert len(results['results']) > 0
54
+
55
+ # Verify search results contain relevant files
56
+ files = {r['file'] for r in results['results']}
57
+ assert any('README.md' in str(f) or 'app.py' in str(f) for f in files), \
58
+ "Should find relevant documentation or core files"
59
+
60
+ def test_llm_manager():
61
+ """Test LLM manager functionality."""
62
+ manager = LLMManager(model='codellama')
63
+ assert manager.initialized
64
+ assert 'ollama' in manager.providers
65
+
66
+ # Test available provider
67
+ provider = manager.get_available_provider()
68
+ assert provider is not None
69
+ assert isinstance(provider, OllamaProvider)
70
+
71
+ # Test model is available
72
+ assert provider.is_available()
73
+ assert provider.model == 'codellama'
74
+
75
+ # Test code explanation with simple code
76
+ code = '''def greet(name: str) -> str:
77
+ """Greet a user by name."""
78
+ return f"Hello, {name}!"'''
79
+
80
+ context = {
81
+ 'question': 'What does this function do?',
82
+ 'code_snippets': [code],
83
+ 'files': ['test.py']
84
+ }
85
+
86
+ explanation = manager.explain_code(code, context)
87
+ assert explanation is not None
88
+ assert len(explanation) > 0
89
+
90
+ def test_ollama_provider():
91
+ """Test Ollama LLM provider."""
92
+ provider = OllamaProvider()
93
+
94
+ # Test model availability
95
+ assert provider.is_available(), "Ollama should be running"
96
+
97
+ # Test response generation
98
+ prompt = "What is a code analyzer?"
99
+ response = provider.generate_response(prompt)
100
+ assert response is not None
101
+ assert len(response) > 0
102
+
103
+ def test_contextly_ask(contextly_app):
104
+ """Test the ask functionality with real questions about the codebase."""
105
+ questions = [
106
+ "What is the purpose of the sync.py file?",
107
+ "How does the embedding engine work?",
108
+ "What LLM models are supported?",
109
+ "How are code snippets parsed?"
110
+ ]
111
+
112
+ for question in questions:
113
+ result = contextly_app.ask(question)
114
+ assert result is not None
115
+ assert 'answer' in result or 'error' in result
116
+
117
+ if 'answer' in result:
118
+ assert len(result['answer']) > 0
119
+ # Verify context is provided when available
120
+ if result.get('context'):
121
+ assert 'results' in result['context']
122
+ assert len(result['context']['results']) > 0
123
+
124
+ def test_error_handling(contextly_app):
125
+ """Test error handling in various scenarios."""
126
+ # Test with empty question
127
+ with pytest.raises(Exception):
128
+ contextly_app.ask("")
129
+
130
+ # Test with very long question
131
+ long_question = "what " * 1000
132
+ result = contextly_app.ask(long_question)
133
+ assert 'error' in result or 'answer' in result
134
+
135
+ # Test with invalid repository path
136
+ invalid_app = Contextly(Path("/nonexistent/path"))
137
+ result = invalid_app.ask("What is this?")
138
+ assert 'error' in result
139
+
140
+ def test_code_analysis(contextly_app):
141
+ """Test code analysis functionality."""
142
+ # Get insights about a specific file
143
+ core_files = [
144
+ 'app.py',
145
+ 'cli.py',
146
+ 'embeddings.py'
147
+ ]
148
+
149
+ for file in core_files:
150
+ question = f"What does {file} do?"
151
+ result = contextly_app.ask(question)
152
+ assert result is not None
153
+ assert 'answer' in result
154
+ assert len(result['answer']) > 0
155
+
156
+ def test_multi_file_context(contextly_app):
157
+ """Test handling questions that require context from multiple files."""
158
+ questions = [
159
+ "How does the CLI interface interact with the core application?",
160
+ "What is the relationship between embeddings.py and analyzer.py?",
161
+ "How do the different LLM providers work together?"
162
+ ]
163
+
164
+ for question in questions:
165
+ result = contextly_app.ask(question)
166
+ assert result is not None
167
+ assert 'answer' in result
168
+ if result.get('context'):
169
+ files = {r['file'] for r in result['context']['results']}
170
+ assert len(files) > 1, "Should reference multiple files for complex questions"
@@ -1,85 +0,0 @@
1
- """
2
- Core business logic for Contextly commands.
3
- """
4
-
5
- from pathlib import Path
6
- from typing import Dict, Any, Optional
7
-
8
- from .core.analyzer import CodeAnalyzer
9
- from .core.embeddings import EmbeddingEngine
10
- from .core.sync import RepoSync
11
- from .llm.manager import LLMManager
12
-
13
- class Contextly:
14
- """Main class coordinating Contextly's functionality."""
15
-
16
- def __init__(self, repo_path: Optional[Path] = None, model: Optional[str] = None):
17
- self.repo_path = repo_path or Path.cwd()
18
- self.llm_manager = LLMManager(model)
19
- self.analyzer = CodeAnalyzer(self.repo_path)
20
- self.embedding_engine = EmbeddingEngine(self.repo_path)
21
- self.repo_sync = RepoSync(self.repo_path)
22
-
23
- def ask(self, question: str) -> Dict[str, Any]:
24
- """Answer questions about the codebase."""
25
- # Initialize if needed
26
- if not hasattr(self, '_initialized'):
27
- self.embedding_engine.initialize()
28
- self._initialized = True
29
-
30
- # Search for relevant code
31
- search_results = self.embedding_engine.search(question)
32
-
33
- # Build context from search results
34
- context = []
35
- for result in search_results['results']:
36
- context.append(f"From {result['file']}:\n{result['content']}\n")
37
-
38
- # Generate answer using LLM
39
- prompt = f"Question: {question}\n\nContext from codebase:\n{''.join(context)}\n\nAnswer:"
40
- answer = self.analyzer.llm.generate(prompt)
41
-
42
- return {
43
- 'answer': answer,
44
- 'context': search_results
45
- }
46
-
47
- def explain(self, location: str) -> str:
48
- """Explain code at specific location."""
49
- file_path, line_number = location.split(':')
50
- return self.analyzer.explain_code(file_path, int(line_number))
51
-
52
- def search(self, term: str) -> Dict[str, Any]:
53
- """Semantic search across codebase."""
54
- if not hasattr(self, '_initialized'):
55
- self.embedding_engine.initialize()
56
- self._initialized = True
57
-
58
- results = self.embedding_engine.search(term)
59
- return {
60
- 'query': results['query'],
61
- 'results': results['results']
62
- }
63
-
64
- def diff(self, path1: Path, path2: Path) -> Dict[str, Any]:
65
- """Compare configuration files."""
66
- return self.analyzer.compare_configs(path1, path2)
67
-
68
- def sync(self) -> None:
69
- """Build or rebuild the embedding index."""
70
- # Scan repository
71
- files = list(self.repo_sync.scan_files())
72
-
73
- # Build index
74
- index = self.repo_sync.index_repository()
75
-
76
- # Initialize embedding engine
77
- self.embedding_engine.initialize()
78
-
79
- # Convert index to list of documents
80
- docs = []
81
- for file_path, file_data in index.items():
82
- docs.extend(file_data.get('chunks', []))
83
-
84
- # Embed documents
85
- self.embedding_engine.embed_documents(docs)
@@ -1,109 +0,0 @@
1
- """
2
- Vector embedding and similarity search functionality.
3
- """
4
-
5
- from pathlib import Path
6
- import os
7
- from typing import List, Dict, Any, Optional
8
- import chromadb
9
- from chromadb.config import Settings
10
- from sentence_transformers import SentenceTransformer
11
-
12
- class EmbeddingEngine:
13
- """Handles document embedding and similarity search."""
14
-
15
- def __init__(self, repo_path: Path):
16
- self.repo_path = repo_path
17
- self.model = None # Lazy load the model
18
- self.db = None # Initialize ChromaDB client
19
- self.collection = None
20
-
21
- def initialize(self):
22
- """Initialize the embedding model and vector store."""
23
- if self.model is None:
24
- self.model = SentenceTransformer('all-MiniLM-L6-v2')
25
-
26
- if self.db is None:
27
- # Create .contextly directory if it doesn't exist
28
- db_path = self.repo_path / '.contextly'
29
- os.makedirs(db_path, exist_ok=True)
30
-
31
- # Initialize ChromaDB with persistent storage
32
- self.db = chromadb.Client(Settings(
33
- persist_directory=str(db_path),
34
- anonymized_telemetry=False
35
- ))
36
-
37
- # Get or create collection for this repo
38
- self.collection = self.db.get_or_create_collection(
39
- name="contextly_docs",
40
- metadata={"repo_path": str(self.repo_path)}
41
- )
42
-
43
- def embed_documents(self, documents: List[Dict[str, Any]]):
44
- """Convert documents into vector embeddings and store them."""
45
- if not documents:
46
- return
47
-
48
- # Ensure initialization
49
- self.initialize()
50
-
51
- # Prepare documents for ChromaDB
52
- texts = []
53
- metadatas = []
54
- ids = []
55
-
56
- for idx, doc in enumerate(documents):
57
- # Create a meaningful document summary
58
- doc_text = f"{doc.get('type', 'unknown')} - {doc.get('name', '')}:\n{doc.get('content', '')}"
59
- texts.append(doc_text)
60
-
61
- # Store metadata
62
- metadatas.append({
63
- "file_path": str(doc.get("file_path", "")),
64
- "type": doc.get("type", "unknown"),
65
- "name": doc.get("name", ""),
66
- "start_line": doc.get("start_line", 0),
67
- "end_line": doc.get("end_line", 0),
68
- })
69
-
70
- # Generate unique ID
71
- doc_id = f"doc_{idx}_{hash(doc_text) & 0xFFFFFFFF}"
72
- ids.append(doc_id)
73
-
74
- # Add documents to the collection
75
- self.collection.add(
76
- documents=texts,
77
- metadatas=metadatas,
78
- ids=ids
79
- )
80
-
81
- def search(self, query: str, top_k: int = 5) -> Dict[str, Any]:
82
- """Search for most similar documents to query."""
83
- self.initialize()
84
-
85
- # Query the collection
86
- results = self.collection.query(
87
- query_texts=[query],
88
- n_results=top_k
89
- )
90
-
91
- # Format results
92
- hits = []
93
- for i in range(len(results['ids'][0])):
94
- hits.append({
95
- 'content': results['documents'][0][i],
96
- 'metadata': results['metadatas'][0][i],
97
- 'score': results['distances'][0][i] if 'distances' in results else None,
98
- 'file': results['metadatas'][0][i]['file_path']
99
- })
100
-
101
- return {
102
- 'query': query,
103
- 'results': hits
104
- }
105
-
106
- def clear(self):
107
- """Clear all embeddings from storage."""
108
- if self.collection:
109
- self.collection.delete()
@@ -1,66 +0,0 @@
1
- """
2
- Repository synchronization and indexing functionality.
3
- """
4
-
5
- from pathlib import Path
6
- from typing import List, Dict, Any, Iterator
7
- import os
8
-
9
- from ..parsers.python import PythonParser
10
- from ..parsers.javascript import JavaScriptParser
11
- from ..parsers.config import ConfigParser
12
-
13
- class RepoSync:
14
- """Handles repository scanning and indexing."""
15
-
16
- SUPPORTED_EXTENSIONS = {
17
- '.py', '.js', '.json', '.yml', '.yaml',
18
- '.env', '.toml', '.md', '.txt'
19
- }
20
-
21
- def __init__(self, repo_path: Path):
22
- self.repo_path = repo_path
23
-
24
- def scan_files(self) -> Iterator[Path]:
25
- """Scan repository for supported files."""
26
- for root, _, files in os.walk(self.repo_path):
27
- root_path = Path(root)
28
- if '.git' in root_path.parts:
29
- continue
30
-
31
- for file in files:
32
- file_path = root_path / file
33
- if file_path.suffix in self.SUPPORTED_EXTENSIONS:
34
- yield file_path
35
-
36
- def index_repository(self) -> Dict[str, Any]:
37
- """Build index of repository contents."""
38
- index = {}
39
- parsers = {
40
- '.py': PythonParser(),
41
- '.js': JavaScriptParser(),
42
- '.jsx': JavaScriptParser(),
43
- '.ts': JavaScriptParser(),
44
- '.tsx': JavaScriptParser(),
45
- '.json': ConfigParser(),
46
- '.yml': ConfigParser(),
47
- '.yaml': ConfigParser(),
48
- '.toml': ConfigParser(),
49
- '.env': ConfigParser(),
50
- }
51
-
52
- for file_path in self.scan_files():
53
- ext = file_path.suffix
54
- parser = parsers.get(ext)
55
-
56
- if parser:
57
- try:
58
- result = parser.parse(file_path)
59
- # Add file path to chunks for reference
60
- for chunk in result.get('chunks', []):
61
- chunk['file_path'] = str(file_path)
62
- index[str(file_path)] = result
63
- except Exception as e:
64
- print(f"Error parsing {file_path}: {e}")
65
-
66
- return index
File without changes
File without changes
File without changes