contextly 0.1.0__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {contextly-0.1.0 → contextly-0.1.4}/PKG-INFO +1 -1
  2. {contextly-0.1.0 → contextly-0.1.4}/pyproject.toml +1 -1
  3. contextly-0.1.4/src/contextly/app.py +111 -0
  4. {contextly-0.1.0 → contextly-0.1.4}/src/contextly/core/analyzer.py +9 -2
  5. contextly-0.1.4/src/contextly/core/embeddings.py +129 -0
  6. contextly-0.1.4/src/contextly/llm/__init__.py +13 -0
  7. {contextly-0.1.0 → contextly-0.1.4}/src/contextly/llm/models.py +24 -15
  8. {contextly-0.1.0 → contextly-0.1.4}/src/contextly/llm/openai.py +3 -1
  9. {contextly-0.1.0 → contextly-0.1.4}/tests/test_core.py +0 -1
  10. contextly-0.1.0/src/contextly/app.py +0 -85
  11. contextly-0.1.0/src/contextly/core/embeddings.py +0 -109
  12. {contextly-0.1.0 → contextly-0.1.4}/.gitignore +0 -0
  13. {contextly-0.1.0 → contextly-0.1.4}/LICENSE +0 -0
  14. {contextly-0.1.0 → contextly-0.1.4}/README.md +0 -0
  15. {contextly-0.1.0 → contextly-0.1.4}/src/contextly/__init__.py +0 -0
  16. {contextly-0.1.0 → contextly-0.1.4}/src/contextly/cli.py +0 -0
  17. {contextly-0.1.0 → contextly-0.1.4}/src/contextly/core/sync.py +0 -0
  18. {contextly-0.1.0 → contextly-0.1.4}/src/contextly/llm/base.py +0 -0
  19. {contextly-0.1.0 → contextly-0.1.4}/src/contextly/llm/manager.py +0 -0
  20. {contextly-0.1.0 → contextly-0.1.4}/src/contextly/llm/ollama.py +0 -0
  21. {contextly-0.1.0 → contextly-0.1.4}/src/contextly/parsers/base.py +0 -0
  22. {contextly-0.1.0 → contextly-0.1.4}/src/contextly/parsers/config.py +0 -0
  23. {contextly-0.1.0 → contextly-0.1.4}/src/contextly/parsers/javascript.py +0 -0
  24. {contextly-0.1.0 → contextly-0.1.4}/src/contextly/parsers/python.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: contextly
3
- Version: 0.1.0
3
+ Version: 0.1.4
4
4
  Summary: AI Context Engine for Developers
5
5
  Project-URL: Homepage, https://github.com/contextly/contextly
6
6
  Project-URL: Repository, https://github.com/contextly/contextly
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "contextly"
7
- version = "0.1.0"
7
+ version = "0.1.4"
8
8
  authors = [
9
9
  { name = "Contextly Team", email = "team@contextly.dev" },
10
10
  ]
@@ -0,0 +1,111 @@
1
+ """
2
+ Core business logic for Contextly commands.
3
+ """
4
+
5
+ from pathlib import Path
6
+ from typing import Dict, Any, Optional
7
+
8
+ from .core.analyzer import CodeAnalyzer
9
+ from .core.embeddings import EmbeddingEngine
10
+ from .core.sync import RepoSync
11
+ from .llm import LLMManager
12
+
13
+ class Contextly:
14
+ """Main class coordinating Contextly's functionality."""
15
+
16
+ def __init__(self, repo_path: Optional[Path] = None, model: Optional[str] = None):
17
+ self.repo_path = repo_path or Path.cwd()
18
+ self.llm_manager = LLMManager(model)
19
+ self.analyzer = CodeAnalyzer(self.repo_path)
20
+ self.embedding_engine = EmbeddingEngine(self.repo_path)
21
+ self.repo_sync = RepoSync(self.repo_path)
22
+
23
+ def ask(self, question: str) -> Dict[str, Any]:
24
+ """Answer questions about the codebase."""
25
+ try:
26
+ # Search for relevant code
27
+ search_results = self.embedding_engine.search(question)
28
+
29
+ # Build context from search results
30
+ context = []
31
+ for result in search_results['results']:
32
+ context.append(f"From {result['file']}:\n{result['content']}\n")
33
+
34
+ if not context:
35
+ return {
36
+ 'answer': 'No relevant code found to answer the question.',
37
+ 'context': search_results
38
+ }
39
+
40
+ # Generate answer using LLM
41
+ context_str = '\n'.join(context)
42
+ context_dict = {
43
+ 'question': question,
44
+ 'code_snippets': context,
45
+ 'files': [r['file'] for r in search_results['results']]
46
+ }
47
+
48
+ answer = self.llm_manager.explain_code(context_str, context_dict)
49
+
50
+ return {
51
+ 'answer': answer,
52
+ 'context': search_results
53
+ }
54
+ except Exception as e:
55
+ return {
56
+ 'error': f'Failed to process question: {str(e)}',
57
+ 'context': None
58
+ }
59
+
60
+ def explain(self, location: str) -> str:
61
+ """Explain code at specific location."""
62
+ try:
63
+ if ':' not in location:
64
+ return "Invalid location format. Use 'file:line_number'"
65
+ file_path, line_number = location.split(':')
66
+ return self.analyzer.explain_code(file_path, int(line_number))
67
+ except ValueError:
68
+ return f"Invalid line number in location: {location}"
69
+ except Exception as e:
70
+ return f"Error explaining code: {str(e)}"
71
+
72
+ def search(self, term: str) -> Dict[str, Any]:
73
+ """Semantic search across codebase."""
74
+ try:
75
+ results = self.embedding_engine.search(term)
76
+ return {
77
+ 'query': results['query'],
78
+ 'results': results['results']
79
+ }
80
+ except Exception as e:
81
+ return {
82
+ 'query': term,
83
+ 'error': str(e),
84
+ 'results': []
85
+ }
86
+
87
+ def diff(self, path1: Path, path2: Path) -> Dict[str, Any]:
88
+ """Compare configuration files."""
89
+ return self.analyzer.compare_configs(path1, path2)
90
+
91
+ def sync(self) -> None:
92
+ """Build or rebuild the embedding index."""
93
+ # Scan repository
94
+ files = list(self.repo_sync.scan_files())
95
+
96
+ # Build index
97
+ index = self.repo_sync.index_repository()
98
+
99
+ # Convert index to list of documents
100
+ docs = []
101
+ for file_path, file_data in index.items():
102
+ docs.extend(file_data.get('chunks', []))
103
+
104
+ # Build or update the embedding index with the documents
105
+ try:
106
+ if docs:
107
+ self.embedding_engine.embed_documents(docs)
108
+ else:
109
+ print("No documents found to index")
110
+ except Exception as e:
111
+ raise RuntimeError(f"Failed to update search index: {str(e)}")
@@ -5,7 +5,9 @@ Core functionality for code parsing and analysis.
5
5
  import difflib
6
6
  from pathlib import Path
7
7
  from typing import List, Dict, Any, Optional
8
+ from typing import Type
8
9
  from ..llm import LLMManager
10
+ from ..parsers.base import BaseParser
9
11
  from ..parsers.python import PythonParser
10
12
  from ..parsers.javascript import JavaScriptParser
11
13
  from ..parsers.config import ConfigParser
@@ -66,8 +68,13 @@ class CodeAnalyzer:
66
68
  return f"No code found at line {line_number}"
67
69
 
68
70
  # Get explanation from LLM
69
- prompt = f"Explain this {result['file_type']} code snippet:\n\n{target_chunk['content']}"
70
- explanation = self.llm.generate(prompt)
71
+ code = target_chunk['content']
72
+ context = {
73
+ 'file_type': result['file_type'],
74
+ 'file_path': file_path,
75
+ 'line_number': line_number
76
+ }
77
+ explanation = self.llm.explain_code(code, context)
71
78
 
72
79
  return explanation
73
80
 
@@ -0,0 +1,129 @@
1
+ """
2
+ Vector embedding and similarity search functionality.
3
+ """
4
+
5
+ from pathlib import Path
6
+ import os
7
+ from typing import List, Dict, Any, Optional
8
+ import chromadb
9
+ from chromadb.config import Settings
10
+ from sentence_transformers import SentenceTransformer
11
+
12
+ class EmbeddingEngine:
13
+ """Handles document embedding and similarity search."""
14
+
15
+ def __init__(self, repo_path: Path):
16
+ """Initialize the embedding engine with the given repository path."""
17
+ self.repo_path = repo_path
18
+ self.model: Optional[SentenceTransformer] = None
19
+ self.db = None
20
+ self.collection = None
21
+ self.data_dir = self.repo_path / '.contextly' / 'embeddings'
22
+ self.collection_name = "code_embeddings"
23
+ self.data_dir.mkdir(parents=True, exist_ok=True)
24
+
25
+ def _initialize(self) -> None:
26
+ """Initialize the model and database if not already initialized."""
27
+ if self.model is None:
28
+ try:
29
+ self.model = SentenceTransformer('all-MiniLM-L6-v2')
30
+ except Exception as e:
31
+ raise RuntimeError(f"Failed to initialize embedding model: {str(e)}")
32
+
33
+ if self.db is None:
34
+ try:
35
+ self.db = chromadb.PersistentClient(path=str(self.data_dir))
36
+ self.collection = self.db.get_or_create_collection(
37
+ name=self.collection_name,
38
+ metadata={"repo_path": str(self.repo_path)}
39
+ )
40
+ except Exception as e:
41
+ raise RuntimeError(f"Failed to initialize ChromaDB: {str(e)}")
42
+
43
+ def _ensure_initialized(self) -> None:
44
+ """Ensure both model and database are initialized."""
45
+ if self.model is None or self.db is None or self.collection is None:
46
+ self._initialize()
47
+ """Initialize the embedding model and vector store if not already initialized."""
48
+ self._ensure_initialized()
49
+
50
+ def embed_documents(self, documents: List[Dict[str, Any]]):
51
+ """Convert documents into vector embeddings and store them."""
52
+ if not documents:
53
+ return
54
+
55
+ # Ensure initialization
56
+ self._ensure_initialized()
57
+
58
+ # Prepare documents for ChromaDB
59
+ texts = []
60
+ metadatas = []
61
+ ids = []
62
+
63
+ for idx, doc in enumerate(documents):
64
+ # Create a meaningful document summary
65
+ doc_text = f"{doc.get('type', 'unknown')} - {doc.get('name', '')}:\n{doc.get('content', '')}"
66
+ texts.append(doc_text)
67
+
68
+ # Store metadata
69
+ metadatas.append({
70
+ "file_path": str(doc.get("file_path", "")),
71
+ "type": doc.get("type", "unknown"),
72
+ "name": doc.get("name", ""),
73
+ "start_line": doc.get("start_line", 0),
74
+ "end_line": doc.get("end_line", 0),
75
+ })
76
+
77
+ # Generate unique ID
78
+ doc_id = f"doc_{idx}_{hash(doc_text) & 0xFFFFFFFF}"
79
+ ids.append(doc_id)
80
+
81
+ # Ensure collection is initialized
82
+ if self.collection is None:
83
+ raise RuntimeError("Collection is not initialized.")
84
+ # Add documents to the collection
85
+ self.collection.add(
86
+ documents=texts,
87
+ metadatas=metadatas,
88
+ ids=ids
89
+ )
90
+
91
+ def search(self, query: str, top_k: int = 5) -> Dict[str, Any]:
92
+ """Search for most similar documents to query."""
93
+ self._ensure_initialized()
94
+
95
+ # Ensure collection is initialized
96
+ if self.collection is None:
97
+ raise RuntimeError("Collection is not initialized.")
98
+
99
+ # Query the collection
100
+ results = self.collection.query(
101
+ query_texts=[query],
102
+ n_results=top_k
103
+ )
104
+
105
+ # Format results
106
+ hits = []
107
+ documents = results.get('documents')
108
+ metadatas = results.get('metadatas')
109
+ distances = results.get('distances')
110
+ ids = results.get('ids')
111
+
112
+ if documents is not None and metadatas is not None and ids is not None:
113
+ for i in range(len(ids[0])):
114
+ hits.append({
115
+ 'content': documents[0][i] if documents[0] is not None else None,
116
+ 'metadata': metadatas[0][i] if metadatas[0] is not None else None,
117
+ 'score': distances[0][i] if distances and distances[0] is not None else None,
118
+ 'file': metadatas[0][i]['file_path'] if metadatas[0] is not None else None
119
+ })
120
+
121
+ return {
122
+ 'query': query,
123
+ 'results': hits
124
+ }
125
+
126
+ def clear(self):
127
+ """Clear all embeddings from storage."""
128
+ if self.collection:
129
+ self.collection.delete()
@@ -0,0 +1,13 @@
1
+ """LLM package for Contextly."""
2
+
3
+ from .manager import LLMManager
4
+ from .base import LLMProvider
5
+ from .models import ModelManager, ModelRegistry, ModelProvider
6
+
7
+ __all__ = [
8
+ 'LLMManager',
9
+ 'LLMProvider',
10
+ 'ModelManager',
11
+ 'ModelRegistry',
12
+ 'ModelProvider'
13
+ ]
@@ -2,7 +2,7 @@
2
2
  Model management system for various LLM providers.
3
3
  """
4
4
 
5
- from dataclasses import dataclass
5
+ from dataclasses import dataclass, field
6
6
  from enum import Enum
7
7
  from typing import Dict, Any, Optional, List
8
8
  import requests
@@ -25,7 +25,7 @@ class ModelInfo:
25
25
  description: str
26
26
  context_length: int
27
27
  quantization: Optional[str] = None
28
- metadata: Dict[str, Any] = None
28
+ metadata: Dict[str, Any] = field(default_factory=dict)
29
29
 
30
30
  class ModelRegistry:
31
31
  """Registry of available models and their capabilities."""
@@ -37,19 +37,28 @@ class ModelRegistry:
37
37
 
38
38
  def _load_models(self) -> None:
39
39
  """Load model registry from config file."""
40
- if self.config_path.exists():
41
- with open(self.config_path, 'r') as f:
42
- data = json.load(f)
43
- for model_data in data.get('models', []):
44
- info = ModelInfo(
45
- name=model_data['name'],
46
- provider=ModelProvider(model_data['provider']),
47
- description=model_data.get('description', ''),
48
- context_length=model_data.get('context_length', 4096),
49
- quantization=model_data.get('quantization'),
50
- metadata=model_data.get('metadata', {})
51
- )
52
- self.models[model_data['name']] = info
40
+ try:
41
+ if self.config_path.exists():
42
+ with open(self.config_path, 'r') as f:
43
+ try:
44
+ data = json.load(f)
45
+ for model_data in data.get('models', []):
46
+ try:
47
+ info = ModelInfo(
48
+ name=model_data['name'],
49
+ provider=ModelProvider(model_data['provider']),
50
+ description=model_data.get('description', ''),
51
+ context_length=model_data.get('context_length', 4096),
52
+ quantization=model_data.get('quantization'),
53
+ metadata=model_data.get('metadata', {})
54
+ )
55
+ self.models[model_data['name']] = info
56
+ except (KeyError, ValueError) as e:
57
+ print(f"Warning: Skipping invalid model data: {e}")
58
+ except Exception as e:
59
+ print(f"Error loading models from config: {e}")
60
+ except Exception as e:
61
+ print(f"Error accessing config file: {e}")
53
62
 
54
63
  def _save_models(self) -> None:
55
64
  """Save model registry to config file."""
@@ -10,7 +10,7 @@ from .base import LLMProvider
10
10
  class OpenAIProvider(LLMProvider):
11
11
  """LLM provider using OpenAI API."""
12
12
 
13
- DEFAULT_MODEL = "gpt-4-mini"
13
+ DEFAULT_MODEL = "gpt-3.5-turbo"
14
14
 
15
15
  def __init__(self, model: str = DEFAULT_MODEL, api_key: Optional[str] = None):
16
16
  self.model = model
@@ -27,6 +27,8 @@ class OpenAIProvider(LLMProvider):
27
27
  raise RuntimeError("OpenAI API is not configured")
28
28
 
29
29
  try:
30
+ if self.client is None:
31
+ raise RuntimeError("OpenAI client is not initialized. Please provide a valid API key.")
30
32
  response = self.client.chat.completions.create(
31
33
  model=self.model,
32
34
  messages=[{"role": "user", "content": prompt}],
@@ -117,7 +117,6 @@ def test_embedding_engine(sample_repo_path):
117
117
  engine = EmbeddingEngine(sample_repo_path)
118
118
 
119
119
  # Test initialization
120
- engine.initialize()
121
120
  assert engine.model is not None
122
121
  assert engine.db is not None
123
122
 
@@ -1,85 +0,0 @@
1
- """
2
- Core business logic for Contextly commands.
3
- """
4
-
5
- from pathlib import Path
6
- from typing import Dict, Any, Optional
7
-
8
- from .core.analyzer import CodeAnalyzer
9
- from .core.embeddings import EmbeddingEngine
10
- from .core.sync import RepoSync
11
- from .llm.manager import LLMManager
12
-
13
- class Contextly:
14
- """Main class coordinating Contextly's functionality."""
15
-
16
- def __init__(self, repo_path: Optional[Path] = None, model: Optional[str] = None):
17
- self.repo_path = repo_path or Path.cwd()
18
- self.llm_manager = LLMManager(model)
19
- self.analyzer = CodeAnalyzer(self.repo_path)
20
- self.embedding_engine = EmbeddingEngine(self.repo_path)
21
- self.repo_sync = RepoSync(self.repo_path)
22
-
23
- def ask(self, question: str) -> Dict[str, Any]:
24
- """Answer questions about the codebase."""
25
- # Initialize if needed
26
- if not hasattr(self, '_initialized'):
27
- self.embedding_engine.initialize()
28
- self._initialized = True
29
-
30
- # Search for relevant code
31
- search_results = self.embedding_engine.search(question)
32
-
33
- # Build context from search results
34
- context = []
35
- for result in search_results['results']:
36
- context.append(f"From {result['file']}:\n{result['content']}\n")
37
-
38
- # Generate answer using LLM
39
- prompt = f"Question: {question}\n\nContext from codebase:\n{''.join(context)}\n\nAnswer:"
40
- answer = self.analyzer.llm.generate(prompt)
41
-
42
- return {
43
- 'answer': answer,
44
- 'context': search_results
45
- }
46
-
47
- def explain(self, location: str) -> str:
48
- """Explain code at specific location."""
49
- file_path, line_number = location.split(':')
50
- return self.analyzer.explain_code(file_path, int(line_number))
51
-
52
- def search(self, term: str) -> Dict[str, Any]:
53
- """Semantic search across codebase."""
54
- if not hasattr(self, '_initialized'):
55
- self.embedding_engine.initialize()
56
- self._initialized = True
57
-
58
- results = self.embedding_engine.search(term)
59
- return {
60
- 'query': results['query'],
61
- 'results': results['results']
62
- }
63
-
64
- def diff(self, path1: Path, path2: Path) -> Dict[str, Any]:
65
- """Compare configuration files."""
66
- return self.analyzer.compare_configs(path1, path2)
67
-
68
- def sync(self) -> None:
69
- """Build or rebuild the embedding index."""
70
- # Scan repository
71
- files = list(self.repo_sync.scan_files())
72
-
73
- # Build index
74
- index = self.repo_sync.index_repository()
75
-
76
- # Initialize embedding engine
77
- self.embedding_engine.initialize()
78
-
79
- # Convert index to list of documents
80
- docs = []
81
- for file_path, file_data in index.items():
82
- docs.extend(file_data.get('chunks', []))
83
-
84
- # Embed documents
85
- self.embedding_engine.embed_documents(docs)
@@ -1,109 +0,0 @@
1
- """
2
- Vector embedding and similarity search functionality.
3
- """
4
-
5
- from pathlib import Path
6
- import os
7
- from typing import List, Dict, Any, Optional
8
- import chromadb
9
- from chromadb.config import Settings
10
- from sentence_transformers import SentenceTransformer
11
-
12
- class EmbeddingEngine:
13
- """Handles document embedding and similarity search."""
14
-
15
- def __init__(self, repo_path: Path):
16
- self.repo_path = repo_path
17
- self.model = None # Lazy load the model
18
- self.db = None # Initialize ChromaDB client
19
- self.collection = None
20
-
21
- def initialize(self):
22
- """Initialize the embedding model and vector store."""
23
- if self.model is None:
24
- self.model = SentenceTransformer('all-MiniLM-L6-v2')
25
-
26
- if self.db is None:
27
- # Create .contextly directory if it doesn't exist
28
- db_path = self.repo_path / '.contextly'
29
- os.makedirs(db_path, exist_ok=True)
30
-
31
- # Initialize ChromaDB with persistent storage
32
- self.db = chromadb.Client(Settings(
33
- persist_directory=str(db_path),
34
- anonymized_telemetry=False
35
- ))
36
-
37
- # Get or create collection for this repo
38
- self.collection = self.db.get_or_create_collection(
39
- name="contextly_docs",
40
- metadata={"repo_path": str(self.repo_path)}
41
- )
42
-
43
- def embed_documents(self, documents: List[Dict[str, Any]]):
44
- """Convert documents into vector embeddings and store them."""
45
- if not documents:
46
- return
47
-
48
- # Ensure initialization
49
- self.initialize()
50
-
51
- # Prepare documents for ChromaDB
52
- texts = []
53
- metadatas = []
54
- ids = []
55
-
56
- for idx, doc in enumerate(documents):
57
- # Create a meaningful document summary
58
- doc_text = f"{doc.get('type', 'unknown')} - {doc.get('name', '')}:\n{doc.get('content', '')}"
59
- texts.append(doc_text)
60
-
61
- # Store metadata
62
- metadatas.append({
63
- "file_path": str(doc.get("file_path", "")),
64
- "type": doc.get("type", "unknown"),
65
- "name": doc.get("name", ""),
66
- "start_line": doc.get("start_line", 0),
67
- "end_line": doc.get("end_line", 0),
68
- })
69
-
70
- # Generate unique ID
71
- doc_id = f"doc_{idx}_{hash(doc_text) & 0xFFFFFFFF}"
72
- ids.append(doc_id)
73
-
74
- # Add documents to the collection
75
- self.collection.add(
76
- documents=texts,
77
- metadatas=metadatas,
78
- ids=ids
79
- )
80
-
81
- def search(self, query: str, top_k: int = 5) -> Dict[str, Any]:
82
- """Search for most similar documents to query."""
83
- self.initialize()
84
-
85
- # Query the collection
86
- results = self.collection.query(
87
- query_texts=[query],
88
- n_results=top_k
89
- )
90
-
91
- # Format results
92
- hits = []
93
- for i in range(len(results['ids'][0])):
94
- hits.append({
95
- 'content': results['documents'][0][i],
96
- 'metadata': results['metadatas'][0][i],
97
- 'score': results['distances'][0][i] if 'distances' in results else None,
98
- 'file': results['metadatas'][0][i]['file_path']
99
- })
100
-
101
- return {
102
- 'query': query,
103
- 'results': hits
104
- }
105
-
106
- def clear(self):
107
- """Clear all embeddings from storage."""
108
- if self.collection:
109
- self.collection.delete()
File without changes
File without changes
File without changes