cognify-code 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,363 @@
1
+ """Context Analyzer - Analyzes file dependencies and relationships."""
2
+
3
+ import ast
4
+ import re
5
+ from dataclasses import dataclass, field
6
+ from pathlib import Path
7
+ from typing import Dict, List, Optional, Set, Tuple
8
+
9
+
10
+ @dataclass
11
+ class FileContext:
12
+ """Context information about a single file."""
13
+ path: str
14
+ content: str
15
+ language: str
16
+ imports: List[str] = field(default_factory=list)
17
+ exports: List[str] = field(default_factory=list) # Functions, classes defined
18
+ dependencies: List[str] = field(default_factory=list) # Files this imports from
19
+ dependents: List[str] = field(default_factory=list) # Files that import this
20
+ symbols: List[str] = field(default_factory=list) # All symbols (funcs, classes, vars)
21
+
22
+ @property
23
+ def token_estimate(self) -> int:
24
+ """Estimate token count (rough: ~4 chars per token)."""
25
+ return len(self.content) // 4
26
+
27
+ def get_summary(self) -> str:
28
+ """Get a brief summary of the file."""
29
+ lines = self.content.split('\n')
30
+ return f"{self.path} ({len(lines)} lines, {self.language})"
31
+
32
+
33
+ @dataclass
34
+ class DependencyGraph:
35
+ """Graph of file dependencies."""
36
+ nodes: Dict[str, FileContext] = field(default_factory=dict)
37
+ edges: Dict[str, Set[str]] = field(default_factory=dict) # file -> imports
38
+ reverse_edges: Dict[str, Set[str]] = field(default_factory=dict) # file -> imported_by
39
+
40
+ def add_file(self, file_ctx: FileContext) -> None:
41
+ """Add a file to the graph."""
42
+ self.nodes[file_ctx.path] = file_ctx
43
+ if file_ctx.path not in self.edges:
44
+ self.edges[file_ctx.path] = set()
45
+ if file_ctx.path not in self.reverse_edges:
46
+ self.reverse_edges[file_ctx.path] = set()
47
+
48
+ def add_dependency(self, from_file: str, to_file: str) -> None:
49
+ """Add a dependency edge."""
50
+ if from_file not in self.edges:
51
+ self.edges[from_file] = set()
52
+ if to_file not in self.reverse_edges:
53
+ self.reverse_edges[to_file] = set()
54
+
55
+ self.edges[from_file].add(to_file)
56
+ self.reverse_edges[to_file].add(from_file)
57
+
58
+ def get_related_files(self, file_path: str, depth: int = 2) -> Set[str]:
59
+ """Get files related to the given file up to a certain depth."""
60
+ related = set()
61
+ to_visit = [(file_path, 0)]
62
+ visited = set()
63
+
64
+ while to_visit:
65
+ current, current_depth = to_visit.pop(0)
66
+ if current in visited or current_depth > depth:
67
+ continue
68
+
69
+ visited.add(current)
70
+ if current != file_path:
71
+ related.add(current)
72
+
73
+ # Add imports and importers
74
+ for dep in self.edges.get(current, set()):
75
+ if dep not in visited:
76
+ to_visit.append((dep, current_depth + 1))
77
+
78
+ for dep in self.reverse_edges.get(current, set()):
79
+ if dep not in visited:
80
+ to_visit.append((dep, current_depth + 1))
81
+
82
+ return related
83
+
84
+
85
+ class ContextAnalyzer:
86
+ """Analyzes codebase for context and dependencies."""
87
+
88
+ LANGUAGE_MAP = {
89
+ '.py': 'python',
90
+ '.js': 'javascript',
91
+ '.ts': 'typescript',
92
+ '.jsx': 'javascript',
93
+ '.tsx': 'typescript',
94
+ '.java': 'java',
95
+ '.go': 'go',
96
+ '.rs': 'rust',
97
+ '.rb': 'ruby',
98
+ '.php': 'php',
99
+ '.c': 'c',
100
+ '.cpp': 'cpp',
101
+ '.h': 'c',
102
+ '.hpp': 'cpp',
103
+ }
104
+
105
+ IGNORE_DIRS = {
106
+ '.git', '__pycache__', 'node_modules', '.venv', 'venv',
107
+ 'env', 'dist', 'build', '.pytest_cache', '.mypy_cache',
108
+ '.tox', 'htmlcov', '.eggs', '.idea', '.vscode',
109
+ }
110
+
111
+ def __init__(self, root_path: Optional[Path] = None):
112
+ self.root_path = Path(root_path or Path.cwd()).resolve()
113
+ self._graph: Optional[DependencyGraph] = None
114
+ self._file_cache: Dict[str, FileContext] = {}
115
+
116
+ def analyze_file(self, file_path: str) -> Optional[FileContext]:
117
+ """Analyze a single file for context."""
118
+ if file_path in self._file_cache:
119
+ return self._file_cache[file_path]
120
+
121
+ full_path = self._resolve_path(file_path)
122
+ if not full_path.exists():
123
+ return None
124
+
125
+ try:
126
+ content = full_path.read_text(encoding='utf-8', errors='ignore')
127
+ except Exception:
128
+ return None
129
+
130
+ language = self.LANGUAGE_MAP.get(full_path.suffix.lower(), 'text')
131
+
132
+ ctx = FileContext(
133
+ path=file_path,
134
+ content=content,
135
+ language=language,
136
+ )
137
+
138
+ # Parse based on language
139
+ if language == 'python':
140
+ self._analyze_python(ctx)
141
+ elif language in ('javascript', 'typescript'):
142
+ self._analyze_javascript(ctx)
143
+ elif language == 'go':
144
+ self._analyze_go(ctx)
145
+
146
+ self._file_cache[file_path] = ctx
147
+ return ctx
148
+
149
+ def _analyze_python(self, ctx: FileContext) -> None:
150
+ """Analyze Python file for imports and symbols."""
151
+ try:
152
+ tree = ast.parse(ctx.content)
153
+ except SyntaxError:
154
+ return
155
+
156
+ for node in ast.walk(tree):
157
+ # Imports
158
+ if isinstance(node, ast.Import):
159
+ for alias in node.names:
160
+ ctx.imports.append(alias.name)
161
+ elif isinstance(node, ast.ImportFrom):
162
+ if node.module:
163
+ ctx.imports.append(node.module)
164
+
165
+ # Exports (top-level definitions)
166
+ elif isinstance(node, ast.FunctionDef):
167
+ ctx.exports.append(node.name)
168
+ ctx.symbols.append(f"function:{node.name}")
169
+ elif isinstance(node, ast.AsyncFunctionDef):
170
+ ctx.exports.append(node.name)
171
+ ctx.symbols.append(f"async_function:{node.name}")
172
+ elif isinstance(node, ast.ClassDef):
173
+ ctx.exports.append(node.name)
174
+ ctx.symbols.append(f"class:{node.name}")
175
+
176
+ def _analyze_javascript(self, ctx: FileContext) -> None:
177
+ """Analyze JavaScript/TypeScript for imports."""
178
+ # import ... from '...'
179
+ import_pattern = r"import\s+.*?\s+from\s+['\"]([^'\"]+)['\"]"
180
+ # require('...')
181
+ require_pattern = r"require\s*\(\s*['\"]([^'\"]+)['\"]\s*\)"
182
+ # export function/class/const
183
+ export_pattern = r"export\s+(?:default\s+)?(?:function|class|const|let|var)\s+(\w+)"
184
+
185
+ for match in re.finditer(import_pattern, ctx.content):
186
+ ctx.imports.append(match.group(1))
187
+
188
+ for match in re.finditer(require_pattern, ctx.content):
189
+ ctx.imports.append(match.group(1))
190
+
191
+ for match in re.finditer(export_pattern, ctx.content):
192
+ ctx.exports.append(match.group(1))
193
+ ctx.symbols.append(f"export:{match.group(1)}")
194
+
195
+ def _analyze_go(self, ctx: FileContext) -> None:
196
+ """Analyze Go file for imports."""
197
+ # import "..." or import (...)
198
+ single_import = r'import\s+"([^"]+)"'
199
+ multi_import = r'import\s+\(([\s\S]*?)\)'
200
+
201
+ for match in re.finditer(single_import, ctx.content):
202
+ ctx.imports.append(match.group(1))
203
+
204
+ for match in re.finditer(multi_import, ctx.content):
205
+ block = match.group(1)
206
+ for line in block.split('\n'):
207
+ line = line.strip()
208
+ if line.startswith('"') and line.endswith('"'):
209
+ ctx.imports.append(line[1:-1])
210
+
211
+ # func Name(...)
212
+ func_pattern = r'func\s+(?:\([^)]+\)\s+)?(\w+)\s*\('
213
+ for match in re.finditer(func_pattern, ctx.content):
214
+ ctx.exports.append(match.group(1))
215
+ ctx.symbols.append(f"func:{match.group(1)}")
216
+
217
+ def build_dependency_graph(self, files: Optional[List[str]] = None) -> DependencyGraph:
218
+ """Build a dependency graph for the codebase."""
219
+ if self._graph and not files:
220
+ return self._graph
221
+
222
+ graph = DependencyGraph()
223
+
224
+ # Get all files if not specified
225
+ if files is None:
226
+ files = self._get_all_code_files()
227
+
228
+ # Analyze each file
229
+ for file_path in files:
230
+ ctx = self.analyze_file(file_path)
231
+ if ctx:
232
+ graph.add_file(ctx)
233
+
234
+ # Resolve dependencies
235
+ for file_path, ctx in graph.nodes.items():
236
+ for imp in ctx.imports:
237
+ resolved = self._resolve_import(file_path, imp)
238
+ if resolved and resolved in graph.nodes:
239
+ graph.add_dependency(file_path, resolved)
240
+ ctx.dependencies.append(resolved)
241
+ graph.nodes[resolved].dependents.append(file_path)
242
+
243
+ self._graph = graph
244
+ return graph
245
+
246
+ def _resolve_import(self, from_file: str, import_path: str) -> Optional[str]:
247
+ """Resolve an import to a file path."""
248
+ from_path = Path(from_file)
249
+
250
+ # Python relative imports
251
+ if import_path.startswith('.'):
252
+ parts = import_path.split('.')
253
+ up_levels = len([p for p in parts if p == ''])
254
+ module_parts = [p for p in parts if p]
255
+
256
+ base = from_path.parent
257
+ for _ in range(up_levels - 1):
258
+ base = base.parent
259
+
260
+ for part in module_parts:
261
+ base = base / part
262
+
263
+ # Try .py extension
264
+ candidates = [
265
+ str(base) + '.py',
266
+ str(base / '__init__.py'),
267
+ ]
268
+ else:
269
+ # Absolute import - try to find in project
270
+ parts = import_path.split('.')
271
+ candidates = [
272
+ '/'.join(parts) + '.py',
273
+ '/'.join(parts) + '/__init__.py',
274
+ 'src/' + '/'.join(parts) + '.py',
275
+ ]
276
+
277
+ for candidate in candidates:
278
+ if self._resolve_path(candidate).exists():
279
+ return candidate
280
+
281
+ return None
282
+
283
+ def _get_all_code_files(self) -> List[str]:
284
+ """Get all code files in the project."""
285
+ files = []
286
+
287
+ for path in self.root_path.rglob('*'):
288
+ if path.is_file() and path.suffix.lower() in self.LANGUAGE_MAP:
289
+ # Check if in ignored directory
290
+ if any(part in self.IGNORE_DIRS for part in path.parts):
291
+ continue
292
+
293
+ try:
294
+ rel_path = str(path.relative_to(self.root_path))
295
+ files.append(rel_path)
296
+ except ValueError:
297
+ pass
298
+
299
+ return files
300
+
301
+ def _resolve_path(self, path: str) -> Path:
302
+ """Resolve a path relative to root."""
303
+ p = Path(path)
304
+ if p.is_absolute():
305
+ return p
306
+ return self.root_path / path
307
+
308
+ def find_related_files(self, file_path: str, max_files: int = 10) -> List[str]:
309
+ """Find files related to the given file."""
310
+ graph = self.build_dependency_graph()
311
+ related = graph.get_related_files(file_path, depth=2)
312
+
313
+ # Sort by relevance (direct dependencies first)
314
+ direct_deps = set(graph.edges.get(file_path, set()))
315
+ direct_deps.update(graph.reverse_edges.get(file_path, set()))
316
+
317
+ sorted_related = sorted(
318
+ related,
319
+ key=lambda f: (f not in direct_deps, f)
320
+ )
321
+
322
+ return sorted_related[:max_files]
323
+
324
+ def find_files_for_query(self, query: str, max_files: int = 5) -> List[str]:
325
+ """Find files relevant to a natural language query."""
326
+ query_lower = query.lower()
327
+ scores: Dict[str, float] = {}
328
+
329
+ # Keywords to look for
330
+ keywords = set(re.findall(r'\b\w+\b', query_lower))
331
+
332
+ for file_path in self._get_all_code_files():
333
+ ctx = self.analyze_file(file_path)
334
+ if not ctx:
335
+ continue
336
+
337
+ score = 0.0
338
+
339
+ # Check file name
340
+ file_name = Path(file_path).stem.lower()
341
+ for kw in keywords:
342
+ if kw in file_name:
343
+ score += 3.0
344
+
345
+ # Check symbols
346
+ for symbol in ctx.symbols:
347
+ symbol_lower = symbol.lower()
348
+ for kw in keywords:
349
+ if kw in symbol_lower:
350
+ score += 2.0
351
+
352
+ # Check content (less weight)
353
+ content_lower = ctx.content.lower()
354
+ for kw in keywords:
355
+ if len(kw) > 3 and kw in content_lower:
356
+ score += 0.5
357
+
358
+ if score > 0:
359
+ scores[file_path] = score
360
+
361
+ # Sort by score
362
+ sorted_files = sorted(scores.keys(), key=lambda f: scores[f], reverse=True)
363
+ return sorted_files[:max_files]
@@ -0,0 +1,309 @@
1
+ """Context Selector - Intelligently selects relevant context for AI prompts."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from pathlib import Path
5
+ from typing import Dict, List, Optional, Set, Tuple
6
+
7
+ from .analyzer import ContextAnalyzer, FileContext
8
+
9
+
10
+ @dataclass
11
+ class ContextConfig:
12
+ """Configuration for context selection."""
13
+ max_tokens: int = 8000 # Max tokens for context
14
+ max_files: int = 10 # Max number of files to include
15
+ include_imports: bool = True # Include imported files
16
+ include_dependents: bool = True # Include files that import target
17
+ depth: int = 2 # Depth for dependency traversal
18
+ prioritize_recent: bool = True # Prioritize recently modified files
19
+
20
+
21
+ @dataclass
22
+ class ContextResult:
23
+ """Result of context selection."""
24
+ files: List[FileContext] = field(default_factory=list)
25
+ total_tokens: int = 0
26
+ truncated: bool = False
27
+ summary: str = ""
28
+
29
+ def format_for_prompt(self, include_summary: bool = True) -> str:
30
+ """Format context for inclusion in LLM prompt."""
31
+ parts = []
32
+
33
+ if include_summary and self.summary:
34
+ parts.append(f"## Project Context\n{self.summary}\n")
35
+
36
+ parts.append("## Relevant Code Files\n")
37
+
38
+ for ctx in self.files:
39
+ parts.append(f"### {ctx.path}")
40
+ parts.append(f"```{ctx.language}")
41
+ parts.append(ctx.content)
42
+ parts.append("```\n")
43
+
44
+ return "\n".join(parts)
45
+
46
+ def format_compact(self) -> str:
47
+ """Format context in a more compact way."""
48
+ parts = []
49
+
50
+ for ctx in self.files:
51
+ parts.append(f"=== {ctx.path} ===")
52
+ parts.append(ctx.content)
53
+ parts.append("")
54
+
55
+ return "\n".join(parts)
56
+
57
+ def get_file_list(self) -> List[str]:
58
+ """Get list of included file paths."""
59
+ return [ctx.path for ctx in self.files]
60
+
61
+
62
+ class ContextSelector:
63
+ """Selects relevant context for AI operations."""
64
+
65
+ def __init__(
66
+ self,
67
+ root_path: Optional[Path] = None,
68
+ config: Optional[ContextConfig] = None,
69
+ ):
70
+ self.root_path = Path(root_path or Path.cwd()).resolve()
71
+ self.config = config or ContextConfig()
72
+ self.analyzer = ContextAnalyzer(self.root_path)
73
+
74
+ def select_for_file(
75
+ self,
76
+ file_path: str,
77
+ query: Optional[str] = None,
78
+ ) -> ContextResult:
79
+ """Select context relevant to a specific file."""
80
+ result = ContextResult()
81
+ selected_files: List[FileContext] = []
82
+ total_tokens = 0
83
+
84
+ # Always include the target file first
85
+ target_ctx = self.analyzer.analyze_file(file_path)
86
+ if target_ctx:
87
+ selected_files.append(target_ctx)
88
+ total_tokens += target_ctx.token_estimate
89
+
90
+ # Find related files
91
+ related = self.analyzer.find_related_files(
92
+ file_path,
93
+ max_files=self.config.max_files
94
+ )
95
+
96
+ # Add related files within token budget
97
+ for rel_path in related:
98
+ if len(selected_files) >= self.config.max_files:
99
+ result.truncated = True
100
+ break
101
+
102
+ ctx = self.analyzer.analyze_file(rel_path)
103
+ if not ctx:
104
+ continue
105
+
106
+ if total_tokens + ctx.token_estimate > self.config.max_tokens:
107
+ result.truncated = True
108
+ continue
109
+
110
+ selected_files.append(ctx)
111
+ total_tokens += ctx.token_estimate
112
+
113
+ result.files = selected_files
114
+ result.total_tokens = total_tokens
115
+ result.summary = self._generate_summary(selected_files)
116
+
117
+ return result
118
+
119
+ def select_for_query(
120
+ self,
121
+ query: str,
122
+ target_file: Optional[str] = None,
123
+ ) -> ContextResult:
124
+ """Select context relevant to a natural language query."""
125
+ result = ContextResult()
126
+ selected_files: List[FileContext] = []
127
+ total_tokens = 0
128
+ seen_files: Set[str] = set()
129
+
130
+ # If target file specified, include it first
131
+ if target_file:
132
+ target_ctx = self.analyzer.analyze_file(target_file)
133
+ if target_ctx:
134
+ selected_files.append(target_ctx)
135
+ total_tokens += target_ctx.token_estimate
136
+ seen_files.add(target_file)
137
+
138
+ # Find files matching the query
139
+ matching_files = self.analyzer.find_files_for_query(
140
+ query,
141
+ max_files=self.config.max_files * 2 # Get more, then filter
142
+ )
143
+
144
+ # Add matching files within budget
145
+ for file_path in matching_files:
146
+ if file_path in seen_files:
147
+ continue
148
+
149
+ if len(selected_files) >= self.config.max_files:
150
+ result.truncated = True
151
+ break
152
+
153
+ ctx = self.analyzer.analyze_file(file_path)
154
+ if not ctx:
155
+ continue
156
+
157
+ if total_tokens + ctx.token_estimate > self.config.max_tokens:
158
+ result.truncated = True
159
+ continue
160
+
161
+ selected_files.append(ctx)
162
+ total_tokens += ctx.token_estimate
163
+ seen_files.add(file_path)
164
+
165
+ # If we have a target file, also include its dependencies
166
+ if target_file and self.config.include_imports:
167
+ related = self.analyzer.find_related_files(target_file, max_files=5)
168
+ for rel_path in related:
169
+ if rel_path in seen_files:
170
+ continue
171
+
172
+ if len(selected_files) >= self.config.max_files:
173
+ break
174
+
175
+ ctx = self.analyzer.analyze_file(rel_path)
176
+ if not ctx:
177
+ continue
178
+
179
+ if total_tokens + ctx.token_estimate > self.config.max_tokens:
180
+ continue
181
+
182
+ selected_files.append(ctx)
183
+ total_tokens += ctx.token_estimate
184
+ seen_files.add(rel_path)
185
+
186
+ result.files = selected_files
187
+ result.total_tokens = total_tokens
188
+ result.summary = self._generate_summary(selected_files)
189
+
190
+ return result
191
+
192
+ def select_explicit(
193
+ self,
194
+ file_paths: List[str],
195
+ include_dependencies: bool = False,
196
+ ) -> ContextResult:
197
+ """Select context from explicitly specified files."""
198
+ result = ContextResult()
199
+ selected_files: List[FileContext] = []
200
+ total_tokens = 0
201
+ seen_files: Set[str] = set()
202
+
203
+ # Add explicitly specified files
204
+ for file_path in file_paths:
205
+ if file_path in seen_files:
206
+ continue
207
+
208
+ ctx = self.analyzer.analyze_file(file_path)
209
+ if not ctx:
210
+ continue
211
+
212
+ if total_tokens + ctx.token_estimate > self.config.max_tokens:
213
+ result.truncated = True
214
+ continue
215
+
216
+ selected_files.append(ctx)
217
+ total_tokens += ctx.token_estimate
218
+ seen_files.add(file_path)
219
+
220
+ # Optionally include dependencies
221
+ if include_dependencies:
222
+ for file_path in file_paths:
223
+ related = self.analyzer.find_related_files(file_path, max_files=3)
224
+ for rel_path in related:
225
+ if rel_path in seen_files:
226
+ continue
227
+
228
+ if len(selected_files) >= self.config.max_files:
229
+ result.truncated = True
230
+ break
231
+
232
+ ctx = self.analyzer.analyze_file(rel_path)
233
+ if not ctx:
234
+ continue
235
+
236
+ if total_tokens + ctx.token_estimate > self.config.max_tokens:
237
+ continue
238
+
239
+ selected_files.append(ctx)
240
+ total_tokens += ctx.token_estimate
241
+ seen_files.add(rel_path)
242
+
243
+ result.files = selected_files
244
+ result.total_tokens = total_tokens
245
+ result.summary = self._generate_summary(selected_files)
246
+
247
+ return result
248
+
249
+ def _generate_summary(self, files: List[FileContext]) -> str:
250
+ """Generate a summary of the selected context."""
251
+ if not files:
252
+ return "No context files selected."
253
+
254
+ languages = set(f.language for f in files)
255
+ total_lines = sum(len(f.content.split('\n')) for f in files)
256
+
257
+ summary_parts = [
258
+ f"Selected {len(files)} files ({total_lines} lines)",
259
+ f"Languages: {', '.join(sorted(languages))}",
260
+ "Files:",
261
+ ]
262
+
263
+ for f in files[:5]:
264
+ deps = len(f.dependencies)
265
+ exports = len(f.exports)
266
+ summary_parts.append(f" - {f.path} ({exports} exports, {deps} deps)")
267
+
268
+ if len(files) > 5:
269
+ summary_parts.append(f" ... and {len(files) - 5} more files")
270
+
271
+ return "\n".join(summary_parts)
272
+
273
+ def get_auto_context(
274
+ self,
275
+ query: str,
276
+ target_files: Optional[List[str]] = None,
277
+ ) -> ContextResult:
278
+ """Automatically select the best context for a query.
279
+
280
+ This is the main entry point for smart context selection.
281
+ """
282
+ if target_files:
283
+ # If specific files are given, use them as primary context
284
+ result = self.select_explicit(target_files, include_dependencies=True)
285
+
286
+ # Also search for query-relevant files
287
+ query_result = self.select_for_query(query)
288
+
289
+ # Merge results, avoiding duplicates
290
+ seen = set(f.path for f in result.files)
291
+ remaining_budget = self.config.max_tokens - result.total_tokens
292
+
293
+ for ctx in query_result.files:
294
+ if ctx.path in seen:
295
+ continue
296
+ if ctx.token_estimate > remaining_budget:
297
+ continue
298
+ if len(result.files) >= self.config.max_files:
299
+ break
300
+
301
+ result.files.append(ctx)
302
+ result.total_tokens += ctx.token_estimate
303
+ remaining_budget -= ctx.token_estimate
304
+
305
+ result.summary = self._generate_summary(result.files)
306
+ return result
307
+ else:
308
+ # No target files, just use query-based selection
309
+ return self.select_for_query(query)