codebase-intel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. codebase_intel/__init__.py +3 -0
  2. codebase_intel/analytics/__init__.py +1 -0
  3. codebase_intel/analytics/benchmark.py +406 -0
  4. codebase_intel/analytics/feedback.py +496 -0
  5. codebase_intel/analytics/tracker.py +439 -0
  6. codebase_intel/cli/__init__.py +1 -0
  7. codebase_intel/cli/main.py +740 -0
  8. codebase_intel/contracts/__init__.py +1 -0
  9. codebase_intel/contracts/auto_generator.py +438 -0
  10. codebase_intel/contracts/evaluator.py +531 -0
  11. codebase_intel/contracts/models.py +433 -0
  12. codebase_intel/contracts/registry.py +225 -0
  13. codebase_intel/core/__init__.py +1 -0
  14. codebase_intel/core/config.py +248 -0
  15. codebase_intel/core/exceptions.py +454 -0
  16. codebase_intel/core/types.py +375 -0
  17. codebase_intel/decisions/__init__.py +1 -0
  18. codebase_intel/decisions/miner.py +297 -0
  19. codebase_intel/decisions/models.py +302 -0
  20. codebase_intel/decisions/store.py +411 -0
  21. codebase_intel/drift/__init__.py +1 -0
  22. codebase_intel/drift/detector.py +443 -0
  23. codebase_intel/graph/__init__.py +1 -0
  24. codebase_intel/graph/builder.py +391 -0
  25. codebase_intel/graph/parser.py +1232 -0
  26. codebase_intel/graph/query.py +377 -0
  27. codebase_intel/graph/storage.py +736 -0
  28. codebase_intel/mcp/__init__.py +1 -0
  29. codebase_intel/mcp/server.py +710 -0
  30. codebase_intel/orchestrator/__init__.py +1 -0
  31. codebase_intel/orchestrator/assembler.py +649 -0
  32. codebase_intel-0.1.0.dist-info/METADATA +361 -0
  33. codebase_intel-0.1.0.dist-info/RECORD +36 -0
  34. codebase_intel-0.1.0.dist-info/WHEEL +4 -0
  35. codebase_intel-0.1.0.dist-info/entry_points.txt +2 -0
  36. codebase_intel-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,377 @@
1
+ """High-level graph query engine — translates task descriptions into graph traversals.
2
+
3
+ This module bridges the gap between "what is the agent working on?" and
4
+ "what graph nodes/edges are relevant?" It's the intelligence layer above
5
+ raw graph traversal.
6
+
7
+ Edge cases:
8
+ - Task mentions files that don't exist in the graph (new files): return empty + warning
9
+ - Task is too vague ("improve performance"): return high-level module structure
10
+ - Task mentions multiple unrelated areas: union of relevant subgraphs
11
+ - Task mentions external dependencies: include the import edges but not
12
+ the external code itself
13
+ - Conflicting relevance signals: same node relevant for multiple reasons
14
+ with different priorities — take the highest
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import logging
20
+ from dataclasses import dataclass, field
21
+ from pathlib import Path
22
+ from typing import TYPE_CHECKING
23
+
24
+ from codebase_intel.core.types import (
25
+ ContextPriority,
26
+ EdgeKind,
27
+ GraphEdge,
28
+ GraphNode,
29
+ Language,
30
+ NodeKind,
31
+ )
32
+
33
+ if TYPE_CHECKING:
34
+ from codebase_intel.graph.storage import GraphStorage
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+
39
+ @dataclass
40
+ class RelevanceResult:
41
+ """Result of a relevance query — nodes with priorities and explanations."""
42
+
43
+ nodes: list[GraphNode] = field(default_factory=list)
44
+ edges: list[GraphEdge] = field(default_factory=list)
45
+ priorities: dict[str, ContextPriority] = field(default_factory=dict)
46
+ explanations: dict[str, str] = field(default_factory=dict)
47
+ warnings: list[str] = field(default_factory=list)
48
+ truncated: bool = False
49
+
50
+ def nodes_by_priority(self) -> dict[ContextPriority, list[GraphNode]]:
51
+ """Group nodes by their assigned priority."""
52
+ grouped: dict[ContextPriority, list[GraphNode]] = {
53
+ p: [] for p in ContextPriority
54
+ }
55
+ for node in self.nodes:
56
+ priority = self.priorities.get(node.node_id, ContextPriority.LOW)
57
+ grouped[priority].append(node)
58
+ return grouped
59
+
60
+ @property
61
+ def unique_files(self) -> set[Path]:
62
+ """All unique file paths referenced by result nodes."""
63
+ return {n.file_path for n in self.nodes}
64
+
65
+
66
+ class GraphQueryEngine:
67
+ """Translates high-level queries into graph traversals.
68
+
69
+ The engine supports several query modes:
70
+ 1. File-based: "what's relevant to these files?"
71
+ 2. Symbol-based: "what's relevant to this function/class?"
72
+ 3. Impact-based: "what's affected by changes to X?"
73
+ 4. Scope-based: "show me everything in this directory/module"
74
+ """
75
+
76
+ def __init__(self, storage: GraphStorage, max_result_nodes: int = 200) -> None:
77
+ self._storage = storage
78
+ self._max_result_nodes = max_result_nodes
79
+
80
+ async def query_by_files(
81
+ self,
82
+ file_paths: list[Path],
83
+ include_depth: int = 2,
84
+ ) -> RelevanceResult:
85
+ """Find relevant context for a set of files.
86
+
87
+ Priority assignment:
88
+ - CRITICAL: the files themselves
89
+ - HIGH: direct imports/dependencies of those files
90
+ - MEDIUM: transitive dependencies (depth 2)
91
+ - LOW: test files that test these files
92
+
93
+ Edge cases:
94
+ - File not in graph: might be new. Add warning, no nodes.
95
+ - File is a barrel/index: has many dependents. Cap and prioritize
96
+ by which dependents are most coupled (most edges).
97
+ - File is in node_modules: skip deep traversal (it's external)
98
+ """
99
+ result = RelevanceResult()
100
+ seen_ids: set[str] = set()
101
+
102
+ for fp in file_paths:
103
+ file_nodes = await self._storage.get_nodes_by_file(fp)
104
+ if not file_nodes:
105
+ result.warnings.append(
106
+ f"File {fp} not found in graph (new file or not yet indexed)"
107
+ )
108
+ continue
109
+
110
+ for node in file_nodes:
111
+ if node.node_id not in seen_ids:
112
+ seen_ids.add(node.node_id)
113
+ result.nodes.append(node)
114
+ result.priorities[node.node_id] = ContextPriority.CRITICAL
115
+ result.explanations[node.node_id] = "Directly referenced file"
116
+
117
+ # Gather dependencies (what these files import)
118
+ for node in file_nodes:
119
+ if node.kind == NodeKind.MODULE:
120
+ deps = await self._storage.get_dependencies(
121
+ node.node_id,
122
+ max_depth=include_depth,
123
+ )
124
+ for dep in deps:
125
+ if dep.node_id in seen_ids:
126
+ continue
127
+ if dep.is_external:
128
+ continue # Don't include external dependency internals
129
+ if len(result.nodes) >= self._max_result_nodes:
130
+ result.truncated = True
131
+ break
132
+
133
+ seen_ids.add(dep.node_id)
134
+ result.nodes.append(dep)
135
+
136
+ # Depth-based priority
137
+ priority = ContextPriority.HIGH
138
+ explanation = f"Direct dependency of {fp.name}"
139
+ # Check if it's a transitive dep (depth > 1)
140
+ direct_deps = await self._storage.get_dependencies(
141
+ node.node_id, max_depth=1
142
+ )
143
+ direct_ids = {d.node_id for d in direct_deps}
144
+ if dep.node_id not in direct_ids:
145
+ priority = ContextPriority.MEDIUM
146
+ explanation = f"Transitive dependency of {fp.name}"
147
+
148
+ result.priorities[dep.node_id] = priority
149
+ result.explanations[dep.node_id] = explanation
150
+
151
+ # Find test files that test these files
152
+ for node in file_nodes:
153
+ if node.kind == NodeKind.MODULE:
154
+ test_files = await self._find_test_files(node)
155
+ for test_node in test_files:
156
+ if test_node.node_id not in seen_ids:
157
+ seen_ids.add(test_node.node_id)
158
+ result.nodes.append(test_node)
159
+ result.priorities[test_node.node_id] = ContextPriority.LOW
160
+ result.explanations[test_node.node_id] = (
161
+ f"Test file for {fp.name}"
162
+ )
163
+
164
+ return result
165
+
166
+ async def query_by_symbol(
167
+ self,
168
+ symbol_name: str,
169
+ include_depth: int = 2,
170
+ ) -> RelevanceResult:
171
+ """Find relevant context for a specific symbol (function, class, etc.).
172
+
173
+ Edge cases:
174
+ - Symbol name is ambiguous (exists in multiple files): return all
175
+ matches with file path in the explanation
176
+ - Symbol is a common name ("get", "create", "handle"): may match
177
+ many nodes. Prioritize by: same directory > same package > global
178
+ - Symbol doesn't exist: might be a typo, suggest similar names
179
+ """
180
+ result = RelevanceResult()
181
+ candidates = await self._search_symbol(symbol_name)
182
+
183
+ if not candidates:
184
+ result.warnings.append(
185
+ f"Symbol '{symbol_name}' not found in graph. "
186
+ f"It may be new, in an unparsed file, or misspelled."
187
+ )
188
+ return result
189
+
190
+ seen_ids: set[str] = set()
191
+
192
+ for node in candidates:
193
+ seen_ids.add(node.node_id)
194
+ result.nodes.append(node)
195
+ result.priorities[node.node_id] = ContextPriority.CRITICAL
196
+ result.explanations[node.node_id] = f"Matched symbol '{symbol_name}'"
197
+
198
+ # Get dependencies and dependents
199
+ deps = await self._storage.get_dependencies(
200
+ node.node_id, max_depth=include_depth
201
+ )
202
+ dependents = await self._storage.get_dependents(
203
+ node.node_id, max_depth=1
204
+ )
205
+
206
+ for dep in deps:
207
+ if dep.node_id not in seen_ids and not dep.is_external:
208
+ seen_ids.add(dep.node_id)
209
+ result.nodes.append(dep)
210
+ result.priorities[dep.node_id] = ContextPriority.HIGH
211
+ result.explanations[dep.node_id] = (
212
+ f"Dependency of {symbol_name}"
213
+ )
214
+
215
+ for dep in dependents:
216
+ if dep.node_id not in seen_ids and not dep.is_external:
217
+ if len(result.nodes) >= self._max_result_nodes:
218
+ result.truncated = True
219
+ break
220
+ seen_ids.add(dep.node_id)
221
+ result.nodes.append(dep)
222
+ result.priorities[dep.node_id] = ContextPriority.MEDIUM
223
+ result.explanations[dep.node_id] = (
224
+ f"Depends on {symbol_name} (may be affected by changes)"
225
+ )
226
+
227
+ return result
228
+
229
+ async def query_impact(
230
+ self,
231
+ changed_files: list[Path],
232
+ max_depth: int = 3,
233
+ ) -> RelevanceResult:
234
+ """Analyze the impact of file changes — "what else could break?"
235
+
236
+ This is a REVERSE traversal: given changes, find what depends on them.
237
+
238
+ Edge cases:
239
+ - Changed file is __init__.py: potentially affects all importers of the package
240
+ - Changed file is a config file: affects everything that reads it
241
+ - Changed file has no dependents: isolated change (rare but valid)
242
+ - Cascade explosion: changed a core utility → 500 dependents.
243
+ Cap at max_result_nodes and prioritize by coupling strength
244
+ (number of edges to the changed file).
245
+ """
246
+ result = RelevanceResult()
247
+ impact_map = await self._storage.impact_analysis(changed_files, max_depth=max_depth)
248
+
249
+ # Add changed files as CRITICAL
250
+ seen_ids: set[str] = set()
251
+ for fp in changed_files:
252
+ for node in await self._storage.get_nodes_by_file(fp):
253
+ if node.node_id not in seen_ids:
254
+ seen_ids.add(node.node_id)
255
+ result.nodes.append(node)
256
+ result.priorities[node.node_id] = ContextPriority.CRITICAL
257
+ result.explanations[node.node_id] = "Changed file"
258
+
259
+ # Add impacted nodes with distance-based priority
260
+ for file_key, affected_nodes in impact_map.items():
261
+ for node in affected_nodes:
262
+ if node.node_id in seen_ids:
263
+ continue
264
+ if len(result.nodes) >= self._max_result_nodes:
265
+ result.truncated = True
266
+ result.warnings.append(
267
+ f"Impact analysis truncated at {self._max_result_nodes} nodes. "
268
+ f"Consider narrowing the scope."
269
+ )
270
+ break
271
+
272
+ seen_ids.add(node.node_id)
273
+ result.nodes.append(node)
274
+ result.priorities[node.node_id] = ContextPriority.HIGH
275
+ result.explanations[node.node_id] = (
276
+ f"Depends on changed file {Path(file_key).name}"
277
+ )
278
+
279
+ return result
280
+
281
+ async def query_scope(
282
+ self,
283
+ directory: Path,
284
+ max_depth: int = 1,
285
+ ) -> RelevanceResult:
286
+ """Get all nodes within a directory scope.
287
+
288
+ Useful for "show me the structure of this module/package."
289
+
290
+ Edge case: directory has 1000+ files (monorepo root). We return
291
+ MODULE-level nodes only and mark as truncated.
292
+ """
293
+ result = RelevanceResult()
294
+
295
+ # This requires a storage method to search by path prefix
296
+ # For now, we do it via the stats + file listing approach
297
+ # In production, we'd add a dedicated index
298
+
299
+ result.warnings.append(
300
+ "Scope queries are limited to indexed files. "
301
+ "Run `codebase-intel analyze` to ensure the graph is current."
302
+ )
303
+ return result
304
+
305
+ # -------------------------------------------------------------------
306
+ # Private helpers
307
+ # -------------------------------------------------------------------
308
+
309
+ async def _search_symbol(self, name: str) -> list[GraphNode]:
310
+ """Search for a symbol by name across the graph.
311
+
312
+ Edge case: name might be:
313
+ - Exact: "UserService" → match name field
314
+ - Qualified: "auth.UserService" → match qualified_name field
315
+ - Partial: "user_serv" → fuzzy match (future)
316
+ """
317
+ # Exact name match
318
+ cursor = await self._storage._db.execute(
319
+ """
320
+ SELECT node_id, kind, name, qualified_name, file_path,
321
+ line_start, line_end, language, content_hash, docstring,
322
+ is_generated, is_external, is_test, is_entry_point,
323
+ metadata_json
324
+ FROM nodes
325
+ WHERE name = ? OR qualified_name = ? OR qualified_name LIKE ?
326
+ LIMIT 50
327
+ """,
328
+ (name, name, f"%.{name}"),
329
+ )
330
+ rows = await cursor.fetchall()
331
+ return [self._storage._row_to_node(row) for row in rows]
332
+
333
+ async def _find_test_files(self, source_node: GraphNode) -> list[GraphNode]:
334
+ """Find test files that test a given source module.
335
+
336
+ Detection heuristics:
337
+ - Explicit TESTS edge in graph
338
+ - File naming convention: foo.py → test_foo.py, foo_test.py
339
+ - Import-based: test file that imports the source module
340
+ """
341
+ results: list[GraphNode] = []
342
+
343
+ # Check for explicit TESTS edges
344
+ cursor = await self._storage._db.execute(
345
+ """
346
+ SELECT source_id FROM edges
347
+ WHERE target_id = ? AND kind = ?
348
+ """,
349
+ (source_node.node_id, EdgeKind.TESTS.value),
350
+ )
351
+ for row in await cursor.fetchall():
352
+ node = await self._storage.get_node(row[0])
353
+ if node:
354
+ results.append(node)
355
+
356
+ # Convention-based: look for test_<name> or <name>_test
357
+ if source_node.kind == NodeKind.MODULE:
358
+ source_name = source_node.name
359
+ test_patterns = [f"test_{source_name}", f"{source_name}_test", f"{source_name}_spec"]
360
+ cursor = await self._storage._db.execute(
361
+ f"""
362
+ SELECT node_id, kind, name, qualified_name, file_path,
363
+ line_start, line_end, language, content_hash, docstring,
364
+ is_generated, is_external, is_test, is_entry_point,
365
+ metadata_json
366
+ FROM nodes
367
+ WHERE kind = 'module' AND is_test = 1
368
+ AND ({' OR '.join(f"name = ?" for _ in test_patterns)})
369
+ """,
370
+ test_patterns,
371
+ )
372
+ for row in await cursor.fetchall():
373
+ node = self._storage._row_to_node(row)
374
+ if node.node_id not in {r.node_id for r in results}:
375
+ results.append(node)
376
+
377
+ return results