contextly 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contextly/__init__.py +5 -0
- contextly/app.py +85 -0
- contextly/cli.py +201 -0
- contextly/core/analyzer.py +111 -0
- contextly/core/embeddings.py +109 -0
- contextly/core/sync.py +66 -0
- contextly/llm/base.py +19 -0
- contextly/llm/manager.py +126 -0
- contextly/llm/models.py +204 -0
- contextly/llm/ollama.py +73 -0
- contextly/llm/openai.py +39 -0
- contextly/parsers/base.py +39 -0
- contextly/parsers/config.py +79 -0
- contextly/parsers/javascript.py +122 -0
- contextly/parsers/python.py +60 -0
- contextly-0.1.0.dist-info/METADATA +209 -0
- contextly-0.1.0.dist-info/RECORD +20 -0
- contextly-0.1.0.dist-info/WHEEL +4 -0
- contextly-0.1.0.dist-info/entry_points.txt +2 -0
- contextly-0.1.0.dist-info/licenses/LICENSE +21 -0
contextly/llm/manager.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM manager for code reasoning.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from typing import Dict, Any, Optional, List, Union
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from .base import LLMProvider
|
|
9
|
+
from .ollama import OllamaProvider
|
|
10
|
+
from .openai import OpenAIProvider
|
|
11
|
+
from .models import ModelManager, ModelProvider, ModelInfo
|
|
12
|
+
|
|
13
|
+
class LLMManager:
|
|
14
|
+
"""Manages LLM providers and generates code explanations."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, model: Optional[str] = None):
|
|
17
|
+
self.model_manager = ModelManager()
|
|
18
|
+
self.providers: Dict[str, LLMProvider] = {}
|
|
19
|
+
self.current_model = model or os.getenv('CONTEXTLY_MODEL', 'codellama')
|
|
20
|
+
self._initialize_providers()
|
|
21
|
+
|
|
22
|
+
def _initialize_providers(self) -> None:
|
|
23
|
+
"""Initialize LLM providers."""
|
|
24
|
+
# Add Ollama provider
|
|
25
|
+
self.providers['ollama'] = OllamaProvider()
|
|
26
|
+
|
|
27
|
+
# Add OpenAI if key is available
|
|
28
|
+
if os.getenv('OPENAI_API_KEY'):
|
|
29
|
+
self.providers['openai'] = OpenAIProvider()
|
|
30
|
+
|
|
31
|
+
def get_available_provider(self) -> Optional[LLMProvider]:
|
|
32
|
+
"""Get the appropriate provider for the current model."""
|
|
33
|
+
model_info = self.model_manager.registry.get_model(self.current_model)
|
|
34
|
+
if not model_info:
|
|
35
|
+
# Try to download the model
|
|
36
|
+
if self.model_manager.download_model(self.current_model, ModelProvider.OLLAMA):
|
|
37
|
+
model_info = self.model_manager.registry.get_model(self.current_model)
|
|
38
|
+
|
|
39
|
+
if model_info:
|
|
40
|
+
provider = self.providers.get(model_info.provider)
|
|
41
|
+
if provider and provider.is_available():
|
|
42
|
+
return provider
|
|
43
|
+
|
|
44
|
+
# Fallback to any available provider
|
|
45
|
+
for provider in self.providers.values():
|
|
46
|
+
if provider.is_available():
|
|
47
|
+
return provider
|
|
48
|
+
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
def list_models(self) -> List[Dict[str, Any]]:
|
|
52
|
+
"""List all available models."""
|
|
53
|
+
return self.model_manager.list_available_models()
|
|
54
|
+
|
|
55
|
+
def set_model(self, model_name: str) -> bool:
|
|
56
|
+
"""Set the current model."""
|
|
57
|
+
if model_name not in [m['name'] for m in self.list_models()]:
|
|
58
|
+
# Try to download the model
|
|
59
|
+
if not self.model_manager.download_model(model_name, ModelProvider.OLLAMA):
|
|
60
|
+
return False
|
|
61
|
+
self.current_model = model_name
|
|
62
|
+
return True
|
|
63
|
+
|
|
64
|
+
def explain_code(self, code: str, context: Optional[Dict[str, Any]] = None) -> str:
|
|
65
|
+
"""Generate an explanation for a code snippet."""
|
|
66
|
+
provider = self.get_available_provider()
|
|
67
|
+
if not provider:
|
|
68
|
+
raise RuntimeError("No LLM provider available")
|
|
69
|
+
|
|
70
|
+
# Build the prompt
|
|
71
|
+
prompt = self._build_explanation_prompt(code, context)
|
|
72
|
+
|
|
73
|
+
# Generate the explanation
|
|
74
|
+
return provider.generate_response(prompt)
|
|
75
|
+
|
|
76
|
+
def compare_configs(self, config1: Dict[str, Any], config2: Dict[str, Any]) -> str:
|
|
77
|
+
"""Compare two configurations and explain differences."""
|
|
78
|
+
provider = self.get_available_provider()
|
|
79
|
+
if not provider:
|
|
80
|
+
raise RuntimeError("No LLM provider available")
|
|
81
|
+
|
|
82
|
+
# Build the prompt
|
|
83
|
+
prompt = self._build_comparison_prompt(config1, config2)
|
|
84
|
+
|
|
85
|
+
# Generate the comparison
|
|
86
|
+
return provider.generate_response(prompt)
|
|
87
|
+
|
|
88
|
+
def _build_explanation_prompt(self, code: str, context: Optional[Dict[str, Any]]) -> str:
|
|
89
|
+
"""Build a prompt for code explanation."""
|
|
90
|
+
prompt = [
|
|
91
|
+
"Please explain the following code in a clear and concise way.",
|
|
92
|
+
"Focus on the main purpose and key functionality.",
|
|
93
|
+
"\nCode to explain:\n```",
|
|
94
|
+
code,
|
|
95
|
+
"```\n"
|
|
96
|
+
]
|
|
97
|
+
|
|
98
|
+
if context:
|
|
99
|
+
prompt.extend([
|
|
100
|
+
"\nAdditional context:",
|
|
101
|
+
f"File: {context.get('file_path', 'unknown')}",
|
|
102
|
+
f"Type: {context.get('type', 'unknown')}",
|
|
103
|
+
f"Language: {context.get('language', 'unknown')}"
|
|
104
|
+
])
|
|
105
|
+
|
|
106
|
+
return "\n".join(prompt)
|
|
107
|
+
|
|
108
|
+
def _build_comparison_prompt(self, config1: Dict[str, Any], config2: Dict[str, Any]) -> str:
|
|
109
|
+
"""Build a prompt for configuration comparison."""
|
|
110
|
+
return f"""Please compare these two configurations and explain key differences and their potential impact:
|
|
111
|
+
|
|
112
|
+
Configuration 1:
|
|
113
|
+
```
|
|
114
|
+
{config1}
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
Configuration 2:
|
|
118
|
+
```
|
|
119
|
+
{config2}
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Please focus on:
|
|
123
|
+
1. Missing or different keys
|
|
124
|
+
2. Value type mismatches
|
|
125
|
+
3. Potential issues or risks
|
|
126
|
+
4. Recommended actions"""
|
contextly/llm/models.py
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Model management system for various LLM providers.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import Dict, Any, Optional, List
|
|
8
|
+
import requests
|
|
9
|
+
import json
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
class ModelProvider(str, Enum):
|
|
13
|
+
"""Supported model providers."""
|
|
14
|
+
OLLAMA = "ollama"
|
|
15
|
+
OPENAI = "openai"
|
|
16
|
+
HUGGINGFACE = "huggingface"
|
|
17
|
+
LOCALAI = "localai"
|
|
18
|
+
CUSTOM = "custom"
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class ModelInfo:
|
|
22
|
+
"""Information about an LLM model."""
|
|
23
|
+
name: str
|
|
24
|
+
provider: ModelProvider
|
|
25
|
+
description: str
|
|
26
|
+
context_length: int
|
|
27
|
+
quantization: Optional[str] = None
|
|
28
|
+
metadata: Dict[str, Any] = None
|
|
29
|
+
|
|
30
|
+
class ModelRegistry:
|
|
31
|
+
"""Registry of available models and their capabilities."""
|
|
32
|
+
|
|
33
|
+
def __init__(self, config_path: Optional[Path] = None):
|
|
34
|
+
self.config_path = config_path or Path.home() / ".contextly" / "models.json"
|
|
35
|
+
self.models: Dict[str, ModelInfo] = {}
|
|
36
|
+
self._load_models()
|
|
37
|
+
|
|
38
|
+
def _load_models(self) -> None:
|
|
39
|
+
"""Load model registry from config file."""
|
|
40
|
+
if self.config_path.exists():
|
|
41
|
+
with open(self.config_path, 'r') as f:
|
|
42
|
+
data = json.load(f)
|
|
43
|
+
for model_data in data.get('models', []):
|
|
44
|
+
info = ModelInfo(
|
|
45
|
+
name=model_data['name'],
|
|
46
|
+
provider=ModelProvider(model_data['provider']),
|
|
47
|
+
description=model_data.get('description', ''),
|
|
48
|
+
context_length=model_data.get('context_length', 4096),
|
|
49
|
+
quantization=model_data.get('quantization'),
|
|
50
|
+
metadata=model_data.get('metadata', {})
|
|
51
|
+
)
|
|
52
|
+
self.models[model_data['name']] = info
|
|
53
|
+
|
|
54
|
+
def _save_models(self) -> None:
|
|
55
|
+
"""Save model registry to config file."""
|
|
56
|
+
self.config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
57
|
+
with open(self.config_path, 'w') as f:
|
|
58
|
+
json.dump({
|
|
59
|
+
'models': [
|
|
60
|
+
{
|
|
61
|
+
'name': info.name,
|
|
62
|
+
'provider': info.provider,
|
|
63
|
+
'description': info.description,
|
|
64
|
+
'context_length': info.context_length,
|
|
65
|
+
'quantization': info.quantization,
|
|
66
|
+
'metadata': info.metadata
|
|
67
|
+
}
|
|
68
|
+
for info in self.models.values()
|
|
69
|
+
]
|
|
70
|
+
}, f, indent=2)
|
|
71
|
+
|
|
72
|
+
def register_model(self, model_info: ModelInfo) -> None:
|
|
73
|
+
"""Register a new model."""
|
|
74
|
+
self.models[model_info.name] = model_info
|
|
75
|
+
self._save_models()
|
|
76
|
+
|
|
77
|
+
def get_model(self, name: str) -> Optional[ModelInfo]:
|
|
78
|
+
"""Get information about a specific model."""
|
|
79
|
+
return self.models.get(name)
|
|
80
|
+
|
|
81
|
+
def list_models(self, provider: Optional[ModelProvider] = None) -> List[ModelInfo]:
|
|
82
|
+
"""List all registered models, optionally filtered by provider."""
|
|
83
|
+
if provider:
|
|
84
|
+
return [m for m in self.models.values() if m.provider == provider]
|
|
85
|
+
return list(self.models.values())
|
|
86
|
+
|
|
87
|
+
def remove_model(self, name: str) -> bool:
|
|
88
|
+
"""Remove a model from the registry."""
|
|
89
|
+
if name in self.models:
|
|
90
|
+
del self.models[name]
|
|
91
|
+
self._save_models()
|
|
92
|
+
return True
|
|
93
|
+
return False
|
|
94
|
+
|
|
95
|
+
class ModelManager:
|
|
96
|
+
"""Manages model downloads and updates."""
|
|
97
|
+
|
|
98
|
+
OLLAMA_API = "http://localhost:11434/api"
|
|
99
|
+
HF_API = "https://huggingface.co/api"
|
|
100
|
+
|
|
101
|
+
def __init__(self):
|
|
102
|
+
self.registry = ModelRegistry()
|
|
103
|
+
|
|
104
|
+
def download_model(self, name: str, provider: ModelProvider, **kwargs) -> bool:
|
|
105
|
+
"""Download a model from its provider."""
|
|
106
|
+
try:
|
|
107
|
+
if provider == ModelProvider.OLLAMA:
|
|
108
|
+
return self._download_ollama_model(name)
|
|
109
|
+
elif provider == ModelProvider.HUGGINGFACE:
|
|
110
|
+
return self._download_hf_model(name, **kwargs)
|
|
111
|
+
elif provider == ModelProvider.CUSTOM:
|
|
112
|
+
return self._download_custom_model(name, **kwargs)
|
|
113
|
+
else:
|
|
114
|
+
raise ValueError(f"Unsupported provider: {provider}")
|
|
115
|
+
except Exception as e:
|
|
116
|
+
print(f"Failed to download model {name}: {e}")
|
|
117
|
+
return False
|
|
118
|
+
|
|
119
|
+
def _download_ollama_model(self, name: str) -> bool:
|
|
120
|
+
"""Download a model from Ollama."""
|
|
121
|
+
try:
|
|
122
|
+
response = requests.post(
|
|
123
|
+
f"{self.OLLAMA_API}/pull",
|
|
124
|
+
json={"name": name}
|
|
125
|
+
)
|
|
126
|
+
if response.status_code == 200:
|
|
127
|
+
# Register model
|
|
128
|
+
self.registry.register_model(ModelInfo(
|
|
129
|
+
name=name,
|
|
130
|
+
provider=ModelProvider.OLLAMA,
|
|
131
|
+
description=f"Ollama model: {name}",
|
|
132
|
+
context_length=8192 # Default for most Ollama models
|
|
133
|
+
))
|
|
134
|
+
return True
|
|
135
|
+
return False
|
|
136
|
+
except Exception:
|
|
137
|
+
return False
|
|
138
|
+
|
|
139
|
+
def _download_hf_model(self, name: str, token: Optional[str] = None) -> bool:
|
|
140
|
+
"""Download a model from Hugging Face."""
|
|
141
|
+
headers = {"Authorization": f"Bearer {token}"} if token else {}
|
|
142
|
+
try:
|
|
143
|
+
# Get model info
|
|
144
|
+
response = requests.get(
|
|
145
|
+
f"{self.HF_API}/models/{name}",
|
|
146
|
+
headers=headers
|
|
147
|
+
)
|
|
148
|
+
if response.status_code == 200:
|
|
149
|
+
model_info = response.json()
|
|
150
|
+
self.registry.register_model(ModelInfo(
|
|
151
|
+
name=name,
|
|
152
|
+
provider=ModelProvider.HUGGINGFACE,
|
|
153
|
+
description=model_info.get('description', ''),
|
|
154
|
+
context_length=model_info.get('max_position_embeddings', 4096),
|
|
155
|
+
metadata=model_info
|
|
156
|
+
))
|
|
157
|
+
return True
|
|
158
|
+
return False
|
|
159
|
+
except Exception:
|
|
160
|
+
return False
|
|
161
|
+
|
|
162
|
+
def _download_custom_model(self, name: str, url: str, **kwargs) -> bool:
|
|
163
|
+
"""Download a custom model from a URL."""
|
|
164
|
+
try:
|
|
165
|
+
self.registry.register_model(ModelInfo(
|
|
166
|
+
name=name,
|
|
167
|
+
provider=ModelProvider.CUSTOM,
|
|
168
|
+
description=kwargs.get('description', f"Custom model: {name}"),
|
|
169
|
+
context_length=kwargs.get('context_length', 4096),
|
|
170
|
+
metadata={'url': url, **kwargs}
|
|
171
|
+
))
|
|
172
|
+
return True
|
|
173
|
+
except Exception:
|
|
174
|
+
return False
|
|
175
|
+
|
|
176
|
+
def list_available_models(self) -> List[Dict[str, Any]]:
|
|
177
|
+
"""List all available models from different providers."""
|
|
178
|
+
models = []
|
|
179
|
+
|
|
180
|
+
# Get Ollama models
|
|
181
|
+
try:
|
|
182
|
+
response = requests.get(f"{self.OLLAMA_API}/tags")
|
|
183
|
+
if response.status_code == 200:
|
|
184
|
+
models.extend([
|
|
185
|
+
{
|
|
186
|
+
'name': model['name'],
|
|
187
|
+
'provider': 'ollama',
|
|
188
|
+
'status': 'available'
|
|
189
|
+
}
|
|
190
|
+
for model in response.json().get('models', [])
|
|
191
|
+
])
|
|
192
|
+
except Exception:
|
|
193
|
+
pass
|
|
194
|
+
|
|
195
|
+
# Add registered models
|
|
196
|
+
for model in self.registry.list_models():
|
|
197
|
+
models.append({
|
|
198
|
+
'name': model.name,
|
|
199
|
+
'provider': model.provider,
|
|
200
|
+
'description': model.description,
|
|
201
|
+
'status': 'registered'
|
|
202
|
+
})
|
|
203
|
+
|
|
204
|
+
return models
|
contextly/llm/ollama.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Integration with local Ollama models.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import requests
|
|
7
|
+
from typing import Dict, Any, Optional
|
|
8
|
+
from .base import LLMProvider
|
|
9
|
+
|
|
10
|
+
class OllamaProvider(LLMProvider):
|
|
11
|
+
"""LLM provider using local Ollama models."""
|
|
12
|
+
|
|
13
|
+
DEFAULT_MODEL = "codellama"
|
|
14
|
+
BASE_URL = "http://localhost:11434/api"
|
|
15
|
+
|
|
16
|
+
def __init__(self, model: str = DEFAULT_MODEL):
|
|
17
|
+
self.model = model
|
|
18
|
+
self._ensure_model()
|
|
19
|
+
|
|
20
|
+
def _ensure_model(self) -> None:
|
|
21
|
+
"""Ensure the model is downloaded and ready."""
|
|
22
|
+
try:
|
|
23
|
+
# Check if model exists
|
|
24
|
+
response = requests.get(f"{self.BASE_URL}/tags")
|
|
25
|
+
if response.status_code == 200:
|
|
26
|
+
models = response.json().get("models", [])
|
|
27
|
+
if not any(model["name"] == self.model for model in models):
|
|
28
|
+
print(f"Downloading {self.model}...")
|
|
29
|
+
# Pull the model
|
|
30
|
+
pull_response = requests.post(
|
|
31
|
+
f"{self.BASE_URL}/pull",
|
|
32
|
+
json={"name": self.model}
|
|
33
|
+
)
|
|
34
|
+
if pull_response.status_code != 200:
|
|
35
|
+
raise RuntimeError(f"Failed to pull model: {pull_response.text}")
|
|
36
|
+
print(f"{self.model} ready!")
|
|
37
|
+
except Exception as e:
|
|
38
|
+
raise RuntimeError(f"Failed to set up Ollama: {str(e)}")
|
|
39
|
+
|
|
40
|
+
def is_available(self) -> bool:
|
|
41
|
+
"""Check if Ollama is running and model is available."""
|
|
42
|
+
try:
|
|
43
|
+
response = requests.get(f"{self.BASE_URL}/tags")
|
|
44
|
+
return response.status_code == 200
|
|
45
|
+
except:
|
|
46
|
+
return False
|
|
47
|
+
|
|
48
|
+
def generate_response(self, prompt: str, **kwargs) -> str:
|
|
49
|
+
"""Generate a response using the Ollama model."""
|
|
50
|
+
try:
|
|
51
|
+
# Set up optimal parameters for code
|
|
52
|
+
params = {
|
|
53
|
+
"model": self.model,
|
|
54
|
+
"prompt": prompt,
|
|
55
|
+
"stream": False,
|
|
56
|
+
"temperature": kwargs.get("temperature", 0.2), # Lower temp for code
|
|
57
|
+
"top_p": kwargs.get("top_p", 0.9),
|
|
58
|
+
"system": "You are an expert programmer helping to understand and explain code.",
|
|
59
|
+
**kwargs
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
response = requests.post(
|
|
63
|
+
f"{self.BASE_URL}/generate",
|
|
64
|
+
json=params
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
if response.status_code == 200:
|
|
68
|
+
return response.json()["response"]
|
|
69
|
+
else:
|
|
70
|
+
raise RuntimeError(f"Ollama API error: {response.status_code} - {response.text}")
|
|
71
|
+
|
|
72
|
+
except Exception as e:
|
|
73
|
+
raise RuntimeError(f"Failed to generate response: {str(e)}")
|
contextly/llm/openai.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Integration with OpenAI API as a fallback.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from typing import Dict, Any, Optional
|
|
7
|
+
from openai import OpenAI
|
|
8
|
+
from .base import LLMProvider
|
|
9
|
+
|
|
10
|
+
class OpenAIProvider(LLMProvider):
|
|
11
|
+
"""LLM provider using OpenAI API."""
|
|
12
|
+
|
|
13
|
+
DEFAULT_MODEL = "gpt-4-mini"
|
|
14
|
+
|
|
15
|
+
def __init__(self, model: str = DEFAULT_MODEL, api_key: Optional[str] = None):
|
|
16
|
+
self.model = model
|
|
17
|
+
self.api_key = api_key or os.getenv("OPENAI_API_KEY")
|
|
18
|
+
self.client = OpenAI(api_key=self.api_key) if self.api_key else None
|
|
19
|
+
|
|
20
|
+
def is_available(self) -> bool:
|
|
21
|
+
"""Check if OpenAI API is configured."""
|
|
22
|
+
return bool(self.client and self.api_key)
|
|
23
|
+
|
|
24
|
+
def generate_response(self, prompt: str, **kwargs) -> str:
|
|
25
|
+
"""Generate a response using the OpenAI API."""
|
|
26
|
+
if not self.is_available():
|
|
27
|
+
raise RuntimeError("OpenAI API is not configured")
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
response = self.client.chat.completions.create(
|
|
31
|
+
model=self.model,
|
|
32
|
+
messages=[{"role": "user", "content": prompt}],
|
|
33
|
+
**kwargs
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
return response.choices[0].message.content.strip()
|
|
37
|
+
|
|
38
|
+
except Exception as e:
|
|
39
|
+
raise RuntimeError(f"OpenAI API error: {str(e)}")
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Base parser interface and utilities.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Dict, Any, List, Optional
|
|
8
|
+
|
|
9
|
+
class BaseParser(ABC):
|
|
10
|
+
"""Abstract base class for all file parsers."""
|
|
11
|
+
|
|
12
|
+
def __init__(self):
|
|
13
|
+
self.supported_extensions: List[str] = []
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
def parse(self, file_path: Path) -> Dict[str, Any]:
|
|
17
|
+
"""Parse a file and return structured content."""
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
def can_parse(self, file_path: Path) -> bool:
|
|
21
|
+
"""Check if this parser can handle the given file."""
|
|
22
|
+
return file_path.suffix in self.supported_extensions
|
|
23
|
+
|
|
24
|
+
class ChunkMetadata:
|
|
25
|
+
"""Metadata for a code or text chunk."""
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
file_path: Path,
|
|
30
|
+
start_line: int,
|
|
31
|
+
end_line: int,
|
|
32
|
+
content_type: str,
|
|
33
|
+
symbols: Optional[List[str]] = None
|
|
34
|
+
):
|
|
35
|
+
self.file_path = file_path
|
|
36
|
+
self.start_line = start_line
|
|
37
|
+
self.end_line = end_line
|
|
38
|
+
self.content_type = content_type
|
|
39
|
+
self.symbols = symbols or []
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Parser for configuration files (JSON, YAML, T 'file_type': 'env' if str(file_path).startswith('.env') else self.file_type_map.get(str(file_path).split('.')[-1], 'unknown')ML, ENV).
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import yaml
|
|
7
|
+
import toml
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Dict, Any
|
|
10
|
+
from dotenv import dotenv_values
|
|
11
|
+
from .base import BaseParser
|
|
12
|
+
|
|
13
|
+
class ConfigParser(BaseParser):
|
|
14
|
+
"""Parser for various configuration file formats."""
|
|
15
|
+
|
|
16
|
+
def __init__(self):
|
|
17
|
+
super().__init__()
|
|
18
|
+
self.supported_extensions = ['.json', '.yml', '.yaml', '.toml', '.env']
|
|
19
|
+
self.file_type_map = {
|
|
20
|
+
'env': 'env',
|
|
21
|
+
'json': 'json',
|
|
22
|
+
'yml': 'yaml',
|
|
23
|
+
'yaml': 'yaml',
|
|
24
|
+
'toml': 'toml'
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
def parse(self, file_path: Path) -> Dict[str, Any]:
|
|
28
|
+
"""Parse configuration file based on its extension."""
|
|
29
|
+
ext = file_path.suffix
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
# Read file content first
|
|
33
|
+
with open(file_path, 'r') as f:
|
|
34
|
+
content = f.read().strip()
|
|
35
|
+
|
|
36
|
+
parsed: Dict[str, Any] = {}
|
|
37
|
+
# Parse .env files
|
|
38
|
+
if file_path.name.startswith('.env'):
|
|
39
|
+
for line in content.split('\n'):
|
|
40
|
+
line = line.strip()
|
|
41
|
+
if line and not line.startswith('#'):
|
|
42
|
+
key, value = line.split('=', 1)
|
|
43
|
+
parsed[key.strip()] = value.strip()
|
|
44
|
+
|
|
45
|
+
if ext == '.json':
|
|
46
|
+
parsed = json.loads(content)
|
|
47
|
+
elif ext in ['.yml', '.yaml']:
|
|
48
|
+
parsed = yaml.safe_load(content)
|
|
49
|
+
elif ext == '.toml':
|
|
50
|
+
parsed = toml.loads(content)
|
|
51
|
+
elif ext == '.env':
|
|
52
|
+
parsed = dict(dotenv_values(file_path))
|
|
53
|
+
|
|
54
|
+
# Only parse non-env files through their respective parsers
|
|
55
|
+
if not file_path.name.startswith('.env'):
|
|
56
|
+
if ext == '.json':
|
|
57
|
+
parsed = json.loads(content)
|
|
58
|
+
elif ext in ['.yml', '.yaml']:
|
|
59
|
+
parsed = yaml.safe_load(content)
|
|
60
|
+
elif ext == '.toml':
|
|
61
|
+
parsed = toml.loads(content)
|
|
62
|
+
|
|
63
|
+
return {
|
|
64
|
+
'file_type': 'env' if file_path.name.startswith('.env') else file_path.suffix.lstrip('.').lower(),
|
|
65
|
+
'content': content,
|
|
66
|
+
'parsed': {k.strip(): v.strip() for k, v in parsed.items()}, # Clean up parsed values
|
|
67
|
+
'chunks': [{
|
|
68
|
+
'type': 'config',
|
|
69
|
+
'content': content,
|
|
70
|
+
'parsed': parsed
|
|
71
|
+
}]
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
except Exception as e:
|
|
75
|
+
return {
|
|
76
|
+
'error': f'Failed to parse {file_path}: {str(e)}',
|
|
77
|
+
'content': content if 'content' in locals() else '',
|
|
78
|
+
'chunks': []
|
|
79
|
+
}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Parser for JavaScript and TypeScript files.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Dict, Any, List, Optional, Match
|
|
8
|
+
from .base import BaseParser
|
|
9
|
+
|
|
10
|
+
class JavaScriptParser(BaseParser):
|
|
11
|
+
"""Parser for JavaScript and TypeScript files using regex patterns."""
|
|
12
|
+
|
|
13
|
+
def __init__(self):
|
|
14
|
+
super().__init__()
|
|
15
|
+
self.supported_extensions = ['.js', '.ts', '.jsx', '.tsx']
|
|
16
|
+
|
|
17
|
+
# Regular expressions for parsing
|
|
18
|
+
self.patterns = {
|
|
19
|
+
'function': re.compile(
|
|
20
|
+
r'(?:export\s+)?' # Optional export
|
|
21
|
+
r'(?:async\s+)?' # Optional async
|
|
22
|
+
r'function\s+' # Function keyword is required for top-level functions
|
|
23
|
+
r'(\w+)' # Function name
|
|
24
|
+
r'\s*'
|
|
25
|
+
r'\([^)]*\)' # Parameters
|
|
26
|
+
r'\s*{', # Opening brace
|
|
27
|
+
re.MULTILINE
|
|
28
|
+
),
|
|
29
|
+
'class': re.compile(
|
|
30
|
+
r'(?:export\s+)?' # Optional export
|
|
31
|
+
r'class\s+' # Class keyword
|
|
32
|
+
r'(\w+)' # Class name
|
|
33
|
+
r'(?:\s+extends\s+\w+)?' # Optional inheritance
|
|
34
|
+
r'\s*{', # Opening brace
|
|
35
|
+
re.MULTILINE
|
|
36
|
+
),
|
|
37
|
+
'import': re.compile(
|
|
38
|
+
r'import\s+'
|
|
39
|
+
r'(?:{[^}]+}|[^;]+)' # Named imports or default import
|
|
40
|
+
r'\s+from\s+'
|
|
41
|
+
r'[\'"]([^\'"]+)[\'"]', # Module path
|
|
42
|
+
re.MULTILINE
|
|
43
|
+
),
|
|
44
|
+
'export': re.compile(
|
|
45
|
+
r'export\s+'
|
|
46
|
+
r'(?:{[^}]+}|[^;]+)', # Named exports or default export
|
|
47
|
+
re.MULTILINE
|
|
48
|
+
)
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
def _find_block_end(self, content: str, start: int) -> int:
|
|
52
|
+
"""Find the end of a code block starting from a position."""
|
|
53
|
+
level = 0
|
|
54
|
+
for i in range(start, len(content)):
|
|
55
|
+
if content[i] == '{':
|
|
56
|
+
level += 1
|
|
57
|
+
elif content[i] == '}':
|
|
58
|
+
level -= 1
|
|
59
|
+
if level == 0:
|
|
60
|
+
return i
|
|
61
|
+
return len(content)
|
|
62
|
+
|
|
63
|
+
def _get_line_number(self, content: str, pos: int) -> int:
|
|
64
|
+
"""Convert position to line number."""
|
|
65
|
+
return content.count('\n', 0, pos) + 1
|
|
66
|
+
|
|
67
|
+
def _extract_code_block(self, content: str, match: Match[str], context_lines: int = 2) -> Dict[str, Any]:
|
|
68
|
+
"""Extract a code block with surrounding context."""
|
|
69
|
+
start_pos = match.start()
|
|
70
|
+
block_end = self._find_block_end(content, start_pos)
|
|
71
|
+
|
|
72
|
+
# Get line numbers
|
|
73
|
+
start_line = self._get_line_number(content, start_pos)
|
|
74
|
+
end_line = self._get_line_number(content, block_end)
|
|
75
|
+
|
|
76
|
+
# Extract the block with context
|
|
77
|
+
lines = content.split('\n')
|
|
78
|
+
context_start = max(0, start_line - context_lines - 1)
|
|
79
|
+
context_end = min(len(lines), end_line + context_lines)
|
|
80
|
+
block_content = '\n'.join(lines[context_start:context_end])
|
|
81
|
+
|
|
82
|
+
return {
|
|
83
|
+
'start_line': start_line,
|
|
84
|
+
'end_line': end_line,
|
|
85
|
+
'content': block_content
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
def parse(self, file_path: Path) -> Dict[str, Any]:
|
|
89
|
+
"""Parse JavaScript/TypeScript file and extract structure and symbols."""
|
|
90
|
+
with open(file_path, 'r') as f:
|
|
91
|
+
content = f.read()
|
|
92
|
+
|
|
93
|
+
chunks: List[Dict[str, Any]] = []
|
|
94
|
+
symbols: List[str] = []
|
|
95
|
+
imports: List[str] = []
|
|
96
|
+
|
|
97
|
+
# Find all patterns in the content
|
|
98
|
+
for pattern_name, pattern in self.patterns.items():
|
|
99
|
+
for match in pattern.finditer(content):
|
|
100
|
+
if pattern_name in ['function', 'class']:
|
|
101
|
+
name = match.group(1)
|
|
102
|
+
block_info = self._extract_code_block(content, match)
|
|
103
|
+
|
|
104
|
+
chunks.append({
|
|
105
|
+
'type': pattern_name,
|
|
106
|
+
'name': name,
|
|
107
|
+
'start_line': block_info['start_line'],
|
|
108
|
+
'end_line': block_info['end_line'],
|
|
109
|
+
'content': block_info['content'],
|
|
110
|
+
})
|
|
111
|
+
symbols.append(name)
|
|
112
|
+
|
|
113
|
+
elif pattern_name == 'import':
|
|
114
|
+
imports.append(match.group(1))
|
|
115
|
+
|
|
116
|
+
return {
|
|
117
|
+
'file_type': 'javascript', # Default type for all JS/TS files for now
|
|
118
|
+
'content': content,
|
|
119
|
+
'chunks': chunks,
|
|
120
|
+
'symbols': symbols,
|
|
121
|
+
'imports': imports
|
|
122
|
+
}
|