ai-coding-assistant 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. ai_coding_assistant-0.5.0.dist-info/METADATA +226 -0
  2. ai_coding_assistant-0.5.0.dist-info/RECORD +89 -0
  3. ai_coding_assistant-0.5.0.dist-info/WHEEL +4 -0
  4. ai_coding_assistant-0.5.0.dist-info/entry_points.txt +3 -0
  5. ai_coding_assistant-0.5.0.dist-info/licenses/LICENSE +21 -0
  6. coding_assistant/__init__.py +3 -0
  7. coding_assistant/__main__.py +19 -0
  8. coding_assistant/cli/__init__.py +1 -0
  9. coding_assistant/cli/app.py +158 -0
  10. coding_assistant/cli/commands/__init__.py +19 -0
  11. coding_assistant/cli/commands/ask.py +178 -0
  12. coding_assistant/cli/commands/config.py +438 -0
  13. coding_assistant/cli/commands/diagram.py +267 -0
  14. coding_assistant/cli/commands/document.py +410 -0
  15. coding_assistant/cli/commands/explain.py +192 -0
  16. coding_assistant/cli/commands/fix.py +249 -0
  17. coding_assistant/cli/commands/index.py +162 -0
  18. coding_assistant/cli/commands/refactor.py +245 -0
  19. coding_assistant/cli/commands/search.py +182 -0
  20. coding_assistant/cli/commands/serve_docs.py +128 -0
  21. coding_assistant/cli/repl.py +381 -0
  22. coding_assistant/cli/theme.py +90 -0
  23. coding_assistant/codebase/__init__.py +1 -0
  24. coding_assistant/codebase/crawler.py +93 -0
  25. coding_assistant/codebase/parser.py +266 -0
  26. coding_assistant/config/__init__.py +25 -0
  27. coding_assistant/config/config_manager.py +615 -0
  28. coding_assistant/config/settings.py +82 -0
  29. coding_assistant/context/__init__.py +19 -0
  30. coding_assistant/context/chunker.py +443 -0
  31. coding_assistant/context/enhanced_retriever.py +322 -0
  32. coding_assistant/context/hybrid_search.py +311 -0
  33. coding_assistant/context/ranker.py +355 -0
  34. coding_assistant/context/retriever.py +119 -0
  35. coding_assistant/context/window.py +362 -0
  36. coding_assistant/documentation/__init__.py +23 -0
  37. coding_assistant/documentation/agents/__init__.py +27 -0
  38. coding_assistant/documentation/agents/coordinator.py +510 -0
  39. coding_assistant/documentation/agents/module_documenter.py +111 -0
  40. coding_assistant/documentation/agents/synthesizer.py +139 -0
  41. coding_assistant/documentation/agents/task_delegator.py +100 -0
  42. coding_assistant/documentation/decomposition/__init__.py +21 -0
  43. coding_assistant/documentation/decomposition/context_preserver.py +477 -0
  44. coding_assistant/documentation/decomposition/module_detector.py +302 -0
  45. coding_assistant/documentation/decomposition/partitioner.py +621 -0
  46. coding_assistant/documentation/generators/__init__.py +14 -0
  47. coding_assistant/documentation/generators/dataflow_generator.py +440 -0
  48. coding_assistant/documentation/generators/diagram_generator.py +511 -0
  49. coding_assistant/documentation/graph/__init__.py +13 -0
  50. coding_assistant/documentation/graph/dependency_builder.py +468 -0
  51. coding_assistant/documentation/graph/module_analyzer.py +475 -0
  52. coding_assistant/documentation/writers/__init__.py +11 -0
  53. coding_assistant/documentation/writers/markdown_writer.py +322 -0
  54. coding_assistant/embeddings/__init__.py +0 -0
  55. coding_assistant/embeddings/generator.py +89 -0
  56. coding_assistant/embeddings/store.py +187 -0
  57. coding_assistant/exceptions/__init__.py +50 -0
  58. coding_assistant/exceptions/base.py +110 -0
  59. coding_assistant/exceptions/llm.py +249 -0
  60. coding_assistant/exceptions/recovery.py +263 -0
  61. coding_assistant/exceptions/storage.py +213 -0
  62. coding_assistant/exceptions/validation.py +230 -0
  63. coding_assistant/llm/__init__.py +1 -0
  64. coding_assistant/llm/client.py +277 -0
  65. coding_assistant/llm/gemini_client.py +181 -0
  66. coding_assistant/llm/groq_client.py +160 -0
  67. coding_assistant/llm/prompts.py +98 -0
  68. coding_assistant/llm/together_client.py +160 -0
  69. coding_assistant/operations/__init__.py +13 -0
  70. coding_assistant/operations/differ.py +369 -0
  71. coding_assistant/operations/generator.py +347 -0
  72. coding_assistant/operations/linter.py +430 -0
  73. coding_assistant/operations/validator.py +406 -0
  74. coding_assistant/storage/__init__.py +9 -0
  75. coding_assistant/storage/database.py +363 -0
  76. coding_assistant/storage/session.py +231 -0
  77. coding_assistant/utils/__init__.py +31 -0
  78. coding_assistant/utils/cache.py +477 -0
  79. coding_assistant/utils/hardware.py +132 -0
  80. coding_assistant/utils/keystore.py +206 -0
  81. coding_assistant/utils/logger.py +32 -0
  82. coding_assistant/utils/progress.py +311 -0
  83. coding_assistant/validation/__init__.py +13 -0
  84. coding_assistant/validation/files.py +305 -0
  85. coding_assistant/validation/inputs.py +335 -0
  86. coding_assistant/validation/params.py +280 -0
  87. coding_assistant/validation/sanitizers.py +243 -0
  88. coding_assistant/vcs/__init__.py +5 -0
  89. coding_assistant/vcs/git.py +269 -0
@@ -0,0 +1,355 @@
1
+ """Context ranking with language-aware scoring."""
2
+
3
+ from typing import List, Dict, Optional
4
+ from datetime import datetime, timedelta
5
+ from pathlib import Path
6
+
7
+
8
+ class ContextRanker:
9
+ """
10
+ Rank retrieved code chunks based on multiple factors.
11
+
12
+ Scoring factors:
13
+ - Semantic similarity (40%)
14
+ - File proximity (20%)
15
+ - Dependency distance (20%)
16
+ - Recency (10%)
17
+ - Code popularity/references (10%)
18
+ """
19
+
20
+ def __init__(self, dependency_graph=None):
21
+ """
22
+ Initialize the context ranker.
23
+
24
+ Args:
25
+ dependency_graph: Optional dependency graph for file relationships
26
+ """
27
+ self.dependency_graph = dependency_graph
28
+
29
+ # Scoring weights
30
+ self.weights = {
31
+ 'similarity': 0.4,
32
+ 'file_proximity': 0.2,
33
+ 'dependency': 0.2,
34
+ 'recency': 0.1,
35
+ 'popularity': 0.1
36
+ }
37
+
38
+ def rank(self, chunks: List[Dict], query: str,
39
+ current_file: Optional[str] = None,
40
+ language: Optional[str] = None) -> List[Dict]:
41
+ """
42
+ Rank chunks based on relevance to query and context.
43
+
44
+ Args:
45
+ chunks: List of chunks with metadata (must have 'id', 'similarity', etc.)
46
+ query: The user's query
47
+ current_file: Current file being edited (for proximity bonus)
48
+ language: Programming language (for language-specific heuristics)
49
+
50
+ Returns:
51
+ Ranked list of chunks with scores
52
+ """
53
+ scored_chunks = []
54
+
55
+ for chunk in chunks:
56
+ score = self._calculate_score(chunk, query, current_file, language)
57
+ chunk['rank_score'] = score
58
+ scored_chunks.append(chunk)
59
+
60
+ # Sort by rank score (descending)
61
+ ranked = sorted(scored_chunks, key=lambda x: x['rank_score'], reverse=True)
62
+
63
+ return ranked
64
+
65
+ def _calculate_score(self, chunk: Dict, query: str,
66
+ current_file: Optional[str],
67
+ language: Optional[str]) -> float:
68
+ """Calculate overall relevance score for a chunk."""
69
+ score = 0.0
70
+
71
+ # 1. Semantic similarity (from vector/hybrid search)
72
+ similarity = chunk.get('similarity', chunk.get('vector_score', 0.0))
73
+ score += similarity * self.weights['similarity']
74
+
75
+ # 2. File proximity
76
+ if current_file and 'file_path' in chunk:
77
+ proximity_score = self._file_proximity_score(chunk['file_path'], current_file)
78
+ score += proximity_score * self.weights['file_proximity']
79
+
80
+ # 3. Dependency distance
81
+ if current_file and self.dependency_graph and 'file_path' in chunk:
82
+ dep_score = self._dependency_score(chunk['file_path'], current_file)
83
+ score += dep_score * self.weights['dependency']
84
+
85
+ # 4. Recency
86
+ if 'last_modified' in chunk:
87
+ recency_score = self._recency_score(chunk['last_modified'])
88
+ score += recency_score * self.weights['recency']
89
+
90
+ # 5. Popularity
91
+ if 'reference_count' in chunk:
92
+ popularity_score = self._popularity_score(chunk['reference_count'])
93
+ score += popularity_score * self.weights['popularity']
94
+
95
+ # 6. Language-specific boosts
96
+ if language:
97
+ lang_boost = self._language_specific_boost(chunk, query, language)
98
+ score += lang_boost
99
+
100
+ return score
101
+
102
+ def _file_proximity_score(self, file_path: str, current_file: str) -> float:
103
+ """
104
+ Score based on how close two files are in the directory structure.
105
+
106
+ Returns:
107
+ Score between 0 and 1
108
+ """
109
+ if file_path == current_file:
110
+ return 1.0
111
+
112
+ # Convert to Path objects
113
+ path1 = Path(file_path)
114
+ path2 = Path(current_file)
115
+
116
+ # Same directory: high score
117
+ if path1.parent == path2.parent:
118
+ return 0.8
119
+
120
+ # Check how many directory levels apart
121
+ try:
122
+ # Get relative path
123
+ rel_path = path1.relative_to(path2.parent)
124
+ levels = len(rel_path.parts) - 1
125
+ # Closer = higher score
126
+ return max(0.0, 1.0 - (levels * 0.2))
127
+ except ValueError:
128
+ # Not in same tree, check common parent
129
+ common_parts = 0
130
+ for p1, p2 in zip(path1.parts, path2.parts):
131
+ if p1 == p2:
132
+ common_parts += 1
133
+ else:
134
+ break
135
+
136
+ # More common directories = higher score
137
+ return min(1.0, common_parts * 0.15)
138
+
139
+ def _dependency_score(self, file_path: str, current_file: str) -> float:
140
+ """
141
+ Score based on dependency distance in import graph.
142
+
143
+ Returns:
144
+ Score between 0 and 1
145
+ """
146
+ if not self.dependency_graph:
147
+ return 0.0
148
+
149
+ try:
150
+ # Get shortest path distance in dependency graph
151
+ distance = self.dependency_graph.get_distance(current_file, file_path)
152
+
153
+ if distance == 0:
154
+ return 1.0
155
+ elif distance == 1:
156
+ # Direct dependency
157
+ return 0.8
158
+ elif distance == 2:
159
+ # Second-degree dependency
160
+ return 0.5
161
+ else:
162
+ # Further away
163
+ return max(0.0, 1.0 - (distance * 0.2))
164
+ except:
165
+ return 0.0
166
+
167
+ def _recency_score(self, last_modified: datetime) -> float:
168
+ """
169
+ Score based on how recently the file was modified.
170
+
171
+ Returns:
172
+ Score between 0 and 1
173
+ """
174
+ if isinstance(last_modified, str):
175
+ try:
176
+ last_modified = datetime.fromisoformat(last_modified)
177
+ except:
178
+ return 0.5 # Default if can't parse
179
+
180
+ now = datetime.now()
181
+ age = now - last_modified
182
+
183
+ # Files modified in last day: 1.0
184
+ if age < timedelta(days=1):
185
+ return 1.0
186
+ # Last week: 0.8
187
+ elif age < timedelta(weeks=1):
188
+ return 0.8
189
+ # Last month: 0.5
190
+ elif age < timedelta(days=30):
191
+ return 0.5
192
+ # Last 3 months: 0.3
193
+ elif age < timedelta(days=90):
194
+ return 0.3
195
+ # Older: decay linearly
196
+ else:
197
+ days_old = age.days
198
+ return max(0.0, 1.0 - (days_old / 365))
199
+
200
+ def _popularity_score(self, reference_count: int) -> float:
201
+ """
202
+ Score based on how many times the code is referenced.
203
+
204
+ Returns:
205
+ Score between 0 and 1
206
+ """
207
+ # Logarithmic scaling
208
+ # 0 refs: 0.0, 1 ref: 0.2, 10 refs: 0.5, 100 refs: 0.7, 1000 refs: 1.0
209
+ if reference_count == 0:
210
+ return 0.0
211
+
212
+ import math
213
+ return min(1.0, math.log10(reference_count + 1) / 3)
214
+
215
+ def _language_specific_boost(self, chunk: Dict, query: str, language: str) -> float:
216
+ """
217
+ Apply language-specific heuristics for ranking.
218
+
219
+ Returns:
220
+ Boost score (can be positive or negative, typically 0.0-0.2)
221
+ """
222
+ boost = 0.0
223
+ file_path = chunk.get('file_path', '').lower()
224
+ query_lower = query.lower()
225
+
226
+ if language == 'python':
227
+ # Boost test files if query mentions "test"
228
+ if 'test' in query_lower and 'test_' in file_path:
229
+ boost += 0.15
230
+
231
+ # Boost __init__.py for module/package questions
232
+ if '__init__.py' in file_path:
233
+ if any(word in query_lower for word in ['module', 'package', 'import']):
234
+ boost += 0.1
235
+
236
+ # Boost setup.py, pyproject.toml for dependency questions
237
+ if 'setup.py' in file_path or 'pyproject.toml' in file_path:
238
+ if any(word in query_lower for word in ['dependency', 'install', 'package']):
239
+ boost += 0.15
240
+
241
+ # Boost main.py, app.py for entry point questions
242
+ if 'main.py' in file_path or 'app.py' in file_path:
243
+ if any(word in query_lower for word in ['start', 'entry', 'run', 'main']):
244
+ boost += 0.1
245
+
246
+ elif language in ('javascript', 'typescript'):
247
+ # Boost index.js, main.js, app.js for entry points
248
+ if any(name in file_path for name in ['index.js', 'main.js', 'app.js', 'index.ts', 'main.ts']):
249
+ if any(word in query_lower for word in ['start', 'entry', 'run', 'main']):
250
+ boost += 0.1
251
+
252
+ # Boost .tsx/.jsx for component questions
253
+ if file_path.endswith(('.tsx', '.jsx')):
254
+ if any(word in query_lower for word in ['component', 'render', 'ui', 'view']):
255
+ boost += 0.15
256
+
257
+ # Boost package.json for dependency questions
258
+ if 'package.json' in file_path:
259
+ if any(word in query_lower for word in ['dependency', 'install', 'package', 'npm']):
260
+ boost += 0.15
261
+
262
+ # Boost test files
263
+ if any(pattern in file_path for pattern in ['.test.', '.spec.', '__tests__']):
264
+ if 'test' in query_lower:
265
+ boost += 0.15
266
+
267
+ # Boost config files for configuration questions
268
+ if any(pattern in file_path for pattern in ['config.', '.config.', 'webpack', 'vite']):
269
+ if any(word in query_lower for word in ['config', 'setup', 'build']):
270
+ boost += 0.1
271
+
272
+ # Generic boosts (language-agnostic)
273
+
274
+ # Boost README files for overview questions
275
+ if 'readme' in file_path:
276
+ if any(word in query_lower for word in ['what', 'overview', 'about', 'intro']):
277
+ boost += 0.15
278
+
279
+ # Boost documentation files
280
+ if any(pattern in file_path for pattern in ['docs/', 'documentation/']):
281
+ if any(word in query_lower for word in ['how', 'guide', 'tutorial', 'example']):
282
+ boost += 0.1
283
+
284
+ # Penalize non-code files for code-specific questions
285
+ if any(word in query_lower for word in ['function', 'class', 'implement', 'code']):
286
+ if file_path.endswith(('.md', '.txt', '.json', '.yaml', '.yml')):
287
+ boost -= 0.1
288
+
289
+ return boost
290
+
291
+ def update_weights(self, **kwargs):
292
+ """
293
+ Update scoring weights.
294
+
295
+ Args:
296
+ **kwargs: New weights (similarity, file_proximity, dependency, recency, popularity)
297
+ """
298
+ for key, value in kwargs.items():
299
+ if key in self.weights:
300
+ self.weights[key] = value
301
+
302
+ # Normalize weights to sum to 1.0
303
+ total = sum(self.weights.values())
304
+ if total > 0:
305
+ for key in self.weights:
306
+ self.weights[key] /= total
307
+
308
+ def get_weights(self) -> Dict[str, float]:
309
+ """Get current scoring weights."""
310
+ return self.weights.copy()
311
+
312
+ def explain_ranking(self, chunk: Dict, query: str,
313
+ current_file: Optional[str] = None,
314
+ language: Optional[str] = None) -> Dict:
315
+ """
316
+ Explain the ranking score for a specific chunk.
317
+
318
+ Returns:
319
+ Dict with breakdown of score components
320
+ """
321
+ similarity = chunk.get('similarity', chunk.get('vector_score', 0.0))
322
+
323
+ explanation = {
324
+ 'total_score': chunk.get('rank_score', 0.0),
325
+ 'components': {
326
+ 'similarity': similarity * self.weights['similarity'],
327
+ 'file_proximity': 0.0,
328
+ 'dependency': 0.0,
329
+ 'recency': 0.0,
330
+ 'popularity': 0.0,
331
+ 'language_boost': 0.0
332
+ }
333
+ }
334
+
335
+ if current_file and 'file_path' in chunk:
336
+ prox = self._file_proximity_score(chunk['file_path'], current_file)
337
+ explanation['components']['file_proximity'] = prox * self.weights['file_proximity']
338
+
339
+ if current_file and self.dependency_graph and 'file_path' in chunk:
340
+ dep = self._dependency_score(chunk['file_path'], current_file)
341
+ explanation['components']['dependency'] = dep * self.weights['dependency']
342
+
343
+ if 'last_modified' in chunk:
344
+ rec = self._recency_score(chunk['last_modified'])
345
+ explanation['components']['recency'] = rec * self.weights['recency']
346
+
347
+ if 'reference_count' in chunk:
348
+ pop = self._popularity_score(chunk['reference_count'])
349
+ explanation['components']['popularity'] = pop * self.weights['popularity']
350
+
351
+ if language:
352
+ boost = self._language_specific_boost(chunk, query, language)
353
+ explanation['components']['language_boost'] = boost
354
+
355
+ return explanation
@@ -0,0 +1,119 @@
1
+ """Semantic retriever for code search using embeddings."""
2
+ from pathlib import Path
3
+ from typing import List, Dict, Optional
4
+ from coding_assistant.codebase.crawler import CodebaseCrawler
5
+ from coding_assistant.codebase.parser import CodeParser
6
+ from coding_assistant.embeddings.generator import EmbeddingGenerator
7
+ from coding_assistant.embeddings.store import VectorStore
8
+
9
+
10
+ class SemanticRetriever:
11
+ """Retrieve relevant code using semantic search."""
12
+
13
+ def __init__(self, project_path: Path):
14
+ """
15
+ Initialize the semantic retriever.
16
+
17
+ Args:
18
+ project_path: Path to the project root
19
+ """
20
+ self.project_path = Path(project_path).resolve()
21
+ self.crawler = CodebaseCrawler(self.project_path)
22
+ self.parser = CodeParser()
23
+ self.embedder = EmbeddingGenerator()
24
+ self.store = VectorStore(persist_dir=self.project_path / ".coding_assistant" / "chroma_db")
25
+
26
+ def clear_index(self):
27
+ """Clear the existing index."""
28
+ self.store.clear()
29
+
30
+ def index_codebase(self, max_files: int = 100):
31
+ """
32
+ Index the codebase for semantic search.
33
+
34
+ Args:
35
+ max_files: Maximum number of files to index
36
+ """
37
+ # Scan files
38
+ files = self.crawler.scan(max_files=max_files)
39
+
40
+ all_chunks = []
41
+
42
+ # Parse each file
43
+ for file_info in files:
44
+ try:
45
+ content = self.crawler.read_file(file_info['path'])
46
+
47
+ # Only parse Python files for now (parser supports Python)
48
+ if file_info['extension'] == '.py':
49
+ parsed = self.parser.parse_file(file_info['path'], content)
50
+ all_chunks.extend(parsed['chunks'])
51
+ else:
52
+ # For non-Python files, create a simple file-level chunk
53
+ all_chunks.append({
54
+ 'type': 'file',
55
+ 'file_path': file_info['path'],
56
+ 'content': content[:5000], # Limit content size
57
+ 'start_line': 0,
58
+ 'end_line': len(content.split('\n'))
59
+ })
60
+ except Exception as e:
61
+ # Skip files that can't be parsed
62
+ if hasattr(self, '_verbose') and self._verbose:
63
+ print(f"Warning: Could not parse {file_info['path']}: {e}")
64
+ continue
65
+
66
+ if not all_chunks:
67
+ print("No chunks to index")
68
+ return
69
+
70
+ # Generate embeddings
71
+ embedded_chunks = self.embedder.embed_code_chunks(all_chunks)
72
+
73
+ # Store in vector database
74
+ self.store.add_chunks(embedded_chunks)
75
+
76
+ def get_stats(self) -> Dict:
77
+ """
78
+ Get statistics about the indexed codebase.
79
+
80
+ Returns:
81
+ Dictionary with stats including total_chunks and embedding_dimension
82
+ """
83
+ return {
84
+ 'total_chunks': self.store.count(),
85
+ 'embedding_dimension': self.embedder.dimension
86
+ }
87
+
88
+ def retrieve(self, query: str, k: int = 5) -> List[Dict]:
89
+ """
90
+ Retrieve relevant code chunks for a query.
91
+
92
+ Args:
93
+ query: User query/question
94
+ k: Number of results to return
95
+
96
+ Returns:
97
+ List of relevant chunks with metadata
98
+ """
99
+ # Generate query embedding
100
+ query_embedding = self.embedder.generate_embedding(query)
101
+
102
+ # Search vector store
103
+ results = self.store.search(query_embedding, n_results=k)
104
+
105
+ # Format results to match expected structure
106
+ formatted_results = []
107
+ for result in results:
108
+ metadata = result['metadata']
109
+ formatted_results.append({
110
+ 'path': metadata['file_path'],
111
+ 'type': metadata['type'],
112
+ 'similarity': result['similarity'],
113
+ 'start_line': metadata['start_line'],
114
+ 'end_line': metadata['end_line'],
115
+ 'content': result['content']
116
+ })
117
+
118
+ return formatted_results
119
+