codebase-intel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. codebase_intel/__init__.py +3 -0
  2. codebase_intel/analytics/__init__.py +1 -0
  3. codebase_intel/analytics/benchmark.py +406 -0
  4. codebase_intel/analytics/feedback.py +496 -0
  5. codebase_intel/analytics/tracker.py +439 -0
  6. codebase_intel/cli/__init__.py +1 -0
  7. codebase_intel/cli/main.py +740 -0
  8. codebase_intel/contracts/__init__.py +1 -0
  9. codebase_intel/contracts/auto_generator.py +438 -0
  10. codebase_intel/contracts/evaluator.py +531 -0
  11. codebase_intel/contracts/models.py +433 -0
  12. codebase_intel/contracts/registry.py +225 -0
  13. codebase_intel/core/__init__.py +1 -0
  14. codebase_intel/core/config.py +248 -0
  15. codebase_intel/core/exceptions.py +454 -0
  16. codebase_intel/core/types.py +375 -0
  17. codebase_intel/decisions/__init__.py +1 -0
  18. codebase_intel/decisions/miner.py +297 -0
  19. codebase_intel/decisions/models.py +302 -0
  20. codebase_intel/decisions/store.py +411 -0
  21. codebase_intel/drift/__init__.py +1 -0
  22. codebase_intel/drift/detector.py +443 -0
  23. codebase_intel/graph/__init__.py +1 -0
  24. codebase_intel/graph/builder.py +391 -0
  25. codebase_intel/graph/parser.py +1232 -0
  26. codebase_intel/graph/query.py +377 -0
  27. codebase_intel/graph/storage.py +736 -0
  28. codebase_intel/mcp/__init__.py +1 -0
  29. codebase_intel/mcp/server.py +710 -0
  30. codebase_intel/orchestrator/__init__.py +1 -0
  31. codebase_intel/orchestrator/assembler.py +649 -0
  32. codebase_intel-0.1.0.dist-info/METADATA +361 -0
  33. codebase_intel-0.1.0.dist-info/RECORD +36 -0
  34. codebase_intel-0.1.0.dist-info/WHEEL +4 -0
  35. codebase_intel-0.1.0.dist-info/entry_points.txt +2 -0
  36. codebase_intel-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1 @@
1
+ """Context Orchestrator — assembles relevant context within token budgets."""
@@ -0,0 +1,649 @@
1
+ """Context assembler — the central orchestration engine.
2
+
3
+ This is the brain of the system. Given a task description and token budget,
4
+ it assembles the optimal context payload from all three sources:
5
+ 1. Code graph → relevant files and dependencies
6
+ 2. Decision journal → applicable decisions and constraints
7
+ 3. Quality contracts → rules the agent must follow
8
+
9
+ The assembler must make hard prioritization choices when budget is tight.
10
+ Its goal is to maximize the probability of correct code generation.
11
+
12
+ Edge cases (the hard ones):
13
+ - Budget is tiny (500 tokens): return only the highest-priority single file +
14
+ critical constraints. No decisions, no contracts beyond blocking errors.
15
+ - Budget is huge (100K tokens): still don't dump everything — irrelevant context
16
+ dilutes attention. Cap at what's actually relevant.
17
+ - Task mentions files that don't exist yet: the agent is creating new code.
18
+ Return architectural context (what patterns to follow, what to import from).
19
+ - Task is ambiguous ("improve performance"): return the broadest relevant scope
20
+ with a warning that context may be incomplete.
21
+ - Multiple tasks in one prompt: detect and assemble context for each independently,
22
+ then merge and deduplicate.
23
+ - Contradictory context: Decision says "use pattern A" but Contract says "pattern A
24
+ is forbidden." Surface the contradiction explicitly.
25
+ - All modules partially initialized: graph exists but no decisions. Return what's
26
+ available with clear warnings about what's missing.
27
+ - Assembly timeout: cap at max_assembly_time_ms. If we can't finish, return
28
+ what we have so far with a truncation warning.
29
+ """
30
+
31
+ from __future__ import annotations
32
+
33
+ import logging
34
+ import time
35
+ from pathlib import Path
36
+ from typing import TYPE_CHECKING
37
+
38
+ import tiktoken
39
+
40
+ from codebase_intel.core.exceptions import (
41
+ BudgetExceededError,
42
+ ErrorContext,
43
+ PartialInitializationError,
44
+ )
45
+ from codebase_intel.core.types import (
46
+ AssembledContext,
47
+ ContextItem,
48
+ ContextPriority,
49
+ TokenBudget,
50
+ )
51
+
52
+ if TYPE_CHECKING:
53
+ from codebase_intel.contracts.evaluator import ContractEvaluator
54
+ from codebase_intel.contracts.registry import ContractRegistry
55
+ from codebase_intel.core.config import OrchestratorConfig
56
+ from codebase_intel.decisions.store import DecisionStore
57
+ from codebase_intel.graph.query import GraphQueryEngine
58
+
59
+ logger = logging.getLogger(__name__)
60
+
61
+ # tiktoken encoder for token estimation
62
+ # cl100k_base is used by GPT-4 and is a reasonable baseline for Claude too
63
+ _encoder: tiktoken.Encoding | None = None
64
+
65
+
66
+ def _get_encoder() -> tiktoken.Encoding:
67
+ global _encoder
68
+ if _encoder is None:
69
+ _encoder = tiktoken.get_encoding("cl100k_base")
70
+ return _encoder
71
+
72
+
73
+ def estimate_tokens(text: str) -> int:
74
+ """Estimate token count for a text string.
75
+
76
+ Edge case: different models tokenize differently. cl100k_base is
77
+ a reasonable approximation. The TokenBudget.safety_margin_pct
78
+ accounts for variance (typically 10%).
79
+
80
+ Edge case: empty string → 0 tokens (valid).
81
+ Edge case: non-ASCII text → may tokenize differently, but the
82
+ safety margin covers this.
83
+ """
84
+ if not text:
85
+ return 0
86
+ return len(_get_encoder().encode(text))
87
+
88
+
89
+ class ContextAssembler:
90
+ """Assembles context from all sources within a token budget."""
91
+
92
+ def __init__(
93
+ self,
94
+ config: OrchestratorConfig,
95
+ graph_engine: GraphQueryEngine | None = None,
96
+ decision_store: DecisionStore | None = None,
97
+ contract_registry: ContractRegistry | None = None,
98
+ contract_evaluator: ContractEvaluator | None = None,
99
+ analytics_tracker: Any | None = None,
100
+ ) -> None:
101
+ self._config = config
102
+ self._graph = graph_engine
103
+ self._decisions = decision_store
104
+ self._contracts = contract_registry
105
+ self._evaluator = contract_evaluator
106
+ self._analytics = analytics_tracker
107
+
108
+ async def assemble(
109
+ self,
110
+ task_description: str,
111
+ file_paths: list[Path] | None = None,
112
+ symbol_names: list[str] | None = None,
113
+ budget: TokenBudget | None = None,
114
+ ) -> AssembledContext:
115
+ """Assemble context for an AI agent's task.
116
+
117
+ This is the main entry point. It:
118
+ 1. Determines what's relevant (via graph)
119
+ 2. Gathers applicable decisions
120
+ 3. Gathers applicable contracts
121
+ 4. Prioritizes and trims to fit budget
122
+ 5. Detects contradictions
123
+ 6. Returns the assembled payload
124
+
125
+ Parameters:
126
+ - task_description: what the agent is trying to do
127
+ - file_paths: files the agent is working on (if known)
128
+ - symbol_names: specific symbols being modified (if known)
129
+ - budget: token budget constraint
130
+ """
131
+ start_time = time.monotonic()
132
+ budget = budget or TokenBudget(total=self._config.default_budget_tokens)
133
+
134
+ context = AssembledContext(budget_tokens=budget.usable)
135
+ items: list[ContextItem] = []
136
+
137
+ # Track what's available vs missing
138
+ available: list[str] = []
139
+ missing: list[str] = []
140
+
141
+ # 1. Gather graph context (relevant files and dependencies)
142
+ if self._graph and file_paths:
143
+ available.append("graph")
144
+ graph_items = await self._gather_graph_context(file_paths, symbol_names)
145
+ items.extend(graph_items)
146
+ elif not self._graph:
147
+ missing.append("graph")
148
+ context.warnings.append("Code graph not available — file dependencies unknown")
149
+
150
+ # 2. Gather decision context
151
+ if self._decisions and file_paths:
152
+ available.append("decisions")
153
+ decision_items = await self._gather_decision_context(file_paths)
154
+ items.extend(decision_items)
155
+ elif not self._decisions:
156
+ missing.append("decisions")
157
+ context.warnings.append("Decision journal not available — architectural context missing")
158
+
159
+ # 3. Gather contract context (pre-generation guidance)
160
+ if self._contracts and file_paths:
161
+ available.append("contracts")
162
+ contract_items = self._gather_contract_context(file_paths)
163
+ items.extend(contract_items)
164
+ elif not self._contracts:
165
+ missing.append("contracts")
166
+
167
+ # Warn about partial initialization
168
+ if missing:
169
+ context.warnings.append(
170
+ f"Partial initialization: available={available}, missing={missing}"
171
+ )
172
+
173
+ # 4. Prioritize and trim to budget
174
+ items.sort(key=lambda i: self._priority_sort_key(i))
175
+ fitted_items, dropped = self._fit_to_budget(items, budget.usable)
176
+
177
+ context.items = fitted_items
178
+ context.total_tokens = sum(i.estimated_tokens for i in fitted_items)
179
+ context.dropped_count = dropped
180
+ context.truncated = dropped > 0
181
+
182
+ # 5. Detect contradictions
183
+ context.conflicts = self._detect_contradictions(fitted_items)
184
+
185
+ # 6. Check assembly time
186
+ elapsed_ms = (time.monotonic() - start_time) * 1000
187
+ context.assembly_time_ms = elapsed_ms
188
+
189
+ if elapsed_ms > self._config.max_assembly_time_ms:
190
+ context.warnings.append(
191
+ f"Assembly took {elapsed_ms:.0f}ms (limit: {self._config.max_assembly_time_ms}ms)"
192
+ )
193
+
194
+ logger.info(
195
+ "Assembled context: %d items, %d tokens, %d dropped, %.0fms",
196
+ len(context.items),
197
+ context.total_tokens,
198
+ context.dropped_count,
199
+ context.assembly_time_ms,
200
+ )
201
+
202
+ # 7. Record analytics (non-blocking — never fail the assembly)
203
+ if self._analytics:
204
+ try:
205
+ # Estimate naive tokens (what reading all requested files would cost)
206
+ naive_tokens = self._estimate_naive_tokens(file_paths or [])
207
+ graph_tokens = sum(
208
+ i.estimated_tokens for i in context.items if i.source == "graph"
209
+ )
210
+ self._analytics.record_context_event(
211
+ task_description=task_description,
212
+ files_requested=len(file_paths or []),
213
+ naive_tokens=naive_tokens,
214
+ graph_tokens=graph_tokens,
215
+ full_tokens=context.total_tokens,
216
+ budget_tokens=budget.usable,
217
+ items_included=len(context.items),
218
+ items_dropped=context.dropped_count,
219
+ decisions_surfaced=sum(
220
+ 1 for i in context.items if i.item_type == "decision"
221
+ ),
222
+ contracts_applied=sum(
223
+ 1 for i in context.items if i.item_type == "contract_rule"
224
+ ),
225
+ drift_warnings=len(context.warnings),
226
+ conflicts_detected=len(context.conflicts),
227
+ truncated=context.truncated,
228
+ assembly_time_ms=context.assembly_time_ms,
229
+ )
230
+ except Exception as exc:
231
+ logger.debug("Analytics recording failed (non-fatal): %s", exc)
232
+
233
+ return context
234
+
235
+ def _estimate_naive_tokens(self, file_paths: list[Path]) -> int:
236
+ """Estimate how many tokens reading all files in the target dirs would cost.
237
+
238
+ This is the "before" number — what happens without codebase-intel.
239
+ Agents typically read the entire directory or all open files.
240
+ """
241
+ seen: set[Path] = set()
242
+ total_content = ""
243
+
244
+ for fp in file_paths:
245
+ # Read the file itself
246
+ if fp.exists() and fp not in seen:
247
+ seen.add(fp)
248
+ try:
249
+ total_content += fp.read_text(encoding="utf-8", errors="ignore")
250
+ except OSError:
251
+ pass
252
+
253
+ # Read all siblings in the same directory (common agent behavior)
254
+ if fp.parent.exists():
255
+ try:
256
+ for sibling in fp.parent.iterdir():
257
+ if (
258
+ sibling.is_file()
259
+ and sibling not in seen
260
+ and sibling.suffix in (".py", ".ts", ".tsx", ".js", ".jsx", ".go", ".rs", ".java")
261
+ ):
262
+ seen.add(sibling)
263
+ try:
264
+ total_content += sibling.read_text(encoding="utf-8", errors="ignore")
265
+ except OSError:
266
+ pass
267
+ except OSError:
268
+ pass
269
+
270
+ return estimate_tokens(total_content)
271
+
272
+ # -------------------------------------------------------------------
273
+ # Context gathering from each source
274
+ # -------------------------------------------------------------------
275
+
276
+ async def _gather_graph_context(
277
+ self,
278
+ file_paths: list[Path],
279
+ symbol_names: list[str] | None = None,
280
+ ) -> list[ContextItem]:
281
+ """Gather context items from the code graph.
282
+
283
+ Strategy:
284
+ - CRITICAL: the files themselves (read content)
285
+ - HIGH: direct dependencies (read content or summary)
286
+ - MEDIUM: transitive dependencies (summary only — save budget)
287
+ - LOW: test files (summary only)
288
+
289
+ Edge case: file is huge (>1000 lines). Include a summary (first 50
290
+ lines + class/function signatures) instead of full content.
291
+ """
292
+ if not self._graph:
293
+ return []
294
+
295
+ items: list[ContextItem] = []
296
+
297
+ result = await self._graph.query_by_files(file_paths)
298
+
299
+ for node in result.nodes:
300
+ priority = result.priorities.get(node.node_id, ContextPriority.LOW)
301
+ explanation = result.explanations.get(node.node_id, "")
302
+
303
+ # Read file content for CRITICAL/HIGH priority
304
+ if priority in (ContextPriority.CRITICAL, ContextPriority.HIGH):
305
+ content = self._read_file_for_context(node.file_path, priority)
306
+ else:
307
+ content = self._summarize_node(node)
308
+
309
+ if content:
310
+ tokens = estimate_tokens(content)
311
+ items.append(ContextItem(
312
+ source="graph",
313
+ item_type="file_content",
314
+ priority=priority,
315
+ estimated_tokens=tokens,
316
+ content=content,
317
+ metadata={
318
+ "file_path": str(node.file_path),
319
+ "node_kind": node.kind.value,
320
+ "explanation": explanation,
321
+ },
322
+ freshness_score=1.0, # Graph is always current
323
+ ))
324
+
325
+ # Add warnings from graph query
326
+ for warning in result.warnings:
327
+ items.append(ContextItem(
328
+ source="graph",
329
+ item_type="warning",
330
+ priority=ContextPriority.HIGH,
331
+ estimated_tokens=estimate_tokens(warning),
332
+ content=warning,
333
+ freshness_score=1.0,
334
+ ))
335
+
336
+ return items
337
+
338
+ async def _gather_decision_context(
339
+ self,
340
+ file_paths: list[Path],
341
+ ) -> list[ContextItem]:
342
+ """Gather applicable decisions for the files being worked on.
343
+
344
+ Strategy:
345
+ - Relevance >= 0.8: HIGH priority (directly related)
346
+ - Relevance >= 0.3: MEDIUM priority (same module/package)
347
+ - Relevance >= 0.1: LOW priority (tangentially related)
348
+
349
+ Stale decisions: included but with lower freshness_score.
350
+ Active constraints: always at least MEDIUM priority.
351
+
352
+ Edge case: many decisions match (10+). Take top 5 by relevance
353
+ to avoid context pollution.
354
+ """
355
+ if not self._decisions:
356
+ return []
357
+
358
+ items: list[ContextItem] = []
359
+ path_set = set(file_paths)
360
+
361
+ scored = await self._decisions.query_by_files(path_set)
362
+ max_decisions = 5
363
+
364
+ for record, relevance in scored[:max_decisions]:
365
+ # Determine priority from relevance score
366
+ if relevance >= 0.8:
367
+ priority = ContextPriority.HIGH
368
+ elif relevance >= 0.3:
369
+ priority = ContextPriority.MEDIUM
370
+ else:
371
+ priority = ContextPriority.LOW
372
+
373
+ # High-priority for active constraints
374
+ if record.constraints and any(c.is_hard for c in record.constraints):
375
+ priority = max(priority, ContextPriority.HIGH, key=lambda p: list(ContextPriority).index(p))
376
+
377
+ # Compute freshness
378
+ freshness = 1.0
379
+ if record.is_stale:
380
+ freshness = 0.3
381
+ elif record.last_validated:
382
+ from datetime import UTC, datetime
383
+ days_since = (datetime.now(UTC) - record.last_validated).days
384
+ freshness = max(0.2, 1.0 - (days_since / self._config.freshness_decay_days))
385
+
386
+ verbose = priority in (ContextPriority.CRITICAL, ContextPriority.HIGH)
387
+ content = record.to_context_string(verbose=verbose)
388
+ tokens = estimate_tokens(content)
389
+
390
+ items.append(ContextItem(
391
+ source="decisions",
392
+ item_type="decision",
393
+ priority=priority,
394
+ estimated_tokens=tokens,
395
+ content=content,
396
+ metadata={
397
+ "decision_id": record.id,
398
+ "relevance": relevance,
399
+ "status": record.status.value,
400
+ },
401
+ freshness_score=freshness,
402
+ ))
403
+
404
+ return items
405
+
406
+ def _gather_contract_context(
407
+ self,
408
+ file_paths: list[Path],
409
+ ) -> list[ContextItem]:
410
+ """Gather applicable quality contracts for pre-generation guidance.
411
+
412
+ Strategy: include all applicable contracts. They're typically compact
413
+ and the agent needs to know ALL rules, not a subset.
414
+
415
+ Edge case: 20+ contracts match. Unlikely in practice (most projects
416
+ have 3-5 contracts). If it happens, sort by priority and take top 10.
417
+ """
418
+ if not self._contracts:
419
+ return []
420
+
421
+ items: list[ContextItem] = []
422
+ seen_ids: set[str] = set()
423
+
424
+ for fp in file_paths:
425
+ applicable = self._contracts.get_for_file(fp)
426
+ for contract in applicable:
427
+ if contract.id in seen_ids:
428
+ continue
429
+ seen_ids.add(contract.id)
430
+
431
+ content = contract.to_context_string(verbose=True)
432
+ tokens = estimate_tokens(content)
433
+
434
+ # Contracts are guidance — always at least MEDIUM priority
435
+ priority = ContextPriority.MEDIUM
436
+ if any(r.severity == ContextPriority.CRITICAL for r in contract.rules):
437
+ priority = ContextPriority.HIGH
438
+
439
+ items.append(ContextItem(
440
+ source="contracts",
441
+ item_type="contract_rule",
442
+ priority=priority,
443
+ estimated_tokens=tokens,
444
+ content=content,
445
+ metadata={
446
+ "contract_id": contract.id,
447
+ "rule_count": len(contract.rules),
448
+ "priority": contract.priority,
449
+ },
450
+ freshness_score=1.0, # Contracts are always current
451
+ ))
452
+
453
+ return items
454
+
455
+ # -------------------------------------------------------------------
456
+ # Budget management
457
+ # -------------------------------------------------------------------
458
+
459
+ def _priority_sort_key(self, item: ContextItem) -> tuple[int, float, int]:
460
+ """Sort key: higher priority first, then fresher, then smaller.
461
+
462
+ This determines the order in which items are included when budget
463
+ is tight. The last items to be included are LOW priority, stale,
464
+ and large.
465
+ """
466
+ priority_order = {
467
+ ContextPriority.CRITICAL: 0,
468
+ ContextPriority.HIGH: 1,
469
+ ContextPriority.MEDIUM: 2,
470
+ ContextPriority.LOW: 3,
471
+ }
472
+ return (
473
+ priority_order.get(item.priority, 3),
474
+ -item.freshness_score, # Higher freshness = earlier
475
+ item.estimated_tokens, # Smaller items first within same priority
476
+ )
477
+
478
+ def _fit_to_budget(
479
+ self,
480
+ items: list[ContextItem],
481
+ budget_tokens: int,
482
+ ) -> tuple[list[ContextItem], int]:
483
+ """Select items that fit within the token budget.
484
+
485
+ Strategy: greedy — include items in priority order until budget
486
+ is exhausted. This is optimal for a knapsack problem when items
487
+ are sorted by value/weight ratio.
488
+
489
+ Edge cases:
490
+ - Single CRITICAL item exceeds budget: include it anyway (the agent
491
+ needs to see the file it's editing, even if nothing else fits).
492
+ But truncate the file content to fit.
493
+ - Budget is 0: return empty list (metadata-only response).
494
+ - All items are tiny: include everything.
495
+
496
+ Returns: (fitted_items, dropped_count)
497
+ """
498
+ if budget_tokens <= 0:
499
+ return [], len(items)
500
+
501
+ fitted: list[ContextItem] = []
502
+ used = 0
503
+ dropped = 0
504
+
505
+ for item in items:
506
+ if used + item.estimated_tokens <= budget_tokens:
507
+ fitted.append(item)
508
+ used += item.estimated_tokens
509
+ elif item.priority == ContextPriority.CRITICAL and not fitted:
510
+ # Must include at least one CRITICAL item, even if it exceeds budget
511
+ # Truncate its content to fit
512
+ truncated = self._truncate_to_fit(item, budget_tokens)
513
+ fitted.append(truncated)
514
+ used += truncated.estimated_tokens
515
+ else:
516
+ dropped += 1
517
+
518
+ return fitted, dropped
519
+
520
+ def _truncate_to_fit(self, item: ContextItem, budget_tokens: int) -> ContextItem:
521
+ """Truncate a context item's content to fit within a token budget.
522
+
523
+ Strategy: keep the first N lines that fit, add a "[truncated]" marker.
524
+
525
+ Edge case: even the first line exceeds budget → include just the
526
+ metadata (file path, type) with no content.
527
+ """
528
+ if item.estimated_tokens <= budget_tokens:
529
+ return item
530
+
531
+ lines = item.content.split("\n")
532
+ truncated_lines: list[str] = []
533
+ used = 0
534
+
535
+ marker = "\n[... truncated to fit token budget ...]"
536
+ marker_tokens = estimate_tokens(marker)
537
+ available = budget_tokens - marker_tokens
538
+
539
+ for line in lines:
540
+ line_tokens = estimate_tokens(line + "\n")
541
+ if used + line_tokens > available:
542
+ break
543
+ truncated_lines.append(line)
544
+ used += line_tokens
545
+
546
+ if not truncated_lines:
547
+ content = f"[File: {item.metadata.get('file_path', 'unknown')} — truncated]"
548
+ else:
549
+ content = "\n".join(truncated_lines) + marker
550
+
551
+ return ContextItem(
552
+ source=item.source,
553
+ item_type=item.item_type,
554
+ priority=item.priority,
555
+ estimated_tokens=estimate_tokens(content),
556
+ content=content,
557
+ metadata={**item.metadata, "truncated": True},
558
+ freshness_score=item.freshness_score,
559
+ )
560
+
561
+ # -------------------------------------------------------------------
562
+ # Contradiction detection
563
+ # -------------------------------------------------------------------
564
+
565
+ def _detect_contradictions(self, items: list[ContextItem]) -> list[str]:
566
+ """Detect contradictions between context items.
567
+
568
+ Types of contradictions:
569
+ 1. Decision says "do X" but contract says "don't do X"
570
+ 2. Two decisions give conflicting guidance for the same code
571
+ 3. Contract rule conflicts (already detected by evaluator)
572
+
573
+ This is heuristic-based — we can't perfectly detect semantic
574
+ contradictions, but we can catch obvious structural ones.
575
+ """
576
+ contradictions: list[str] = []
577
+
578
+ decisions = [i for i in items if i.item_type == "decision"]
579
+ contracts = [i for i in items if i.item_type == "contract_rule"]
580
+
581
+ # Check for stale decision + active contract mismatch
582
+ for decision in decisions:
583
+ if decision.freshness_score < 0.5:
584
+ for contract in contracts:
585
+ # If a contract references the same area as a stale decision,
586
+ # flag it as a potential contradiction
587
+ decision_file = decision.metadata.get("file_path", "")
588
+ contract_scope = contract.metadata.get("contract_id", "")
589
+ if decision_file:
590
+ contradictions.append(
591
+ f"Stale decision {decision.metadata.get('decision_id', '?')} "
592
+ f"may conflict with contract '{contract_scope}'. "
593
+ f"Verify the decision still applies."
594
+ )
595
+
596
+ return contradictions
597
+
598
+ # -------------------------------------------------------------------
599
+ # File reading helpers
600
+ # -------------------------------------------------------------------
601
+
602
+ def _read_file_for_context(
603
+ self,
604
+ file_path: Path,
605
+ priority: ContextPriority,
606
+ ) -> str | None:
607
+ """Read a file's content for inclusion in context.
608
+
609
+ Edge cases:
610
+ - File doesn't exist: return None
611
+ - File is binary: return None
612
+ - File is huge (>1000 lines): return summary for non-CRITICAL priority
613
+ - File has encoding issues: try UTF-8, fall back to latin-1
614
+ """
615
+ if not file_path.exists():
616
+ return None
617
+
618
+ try:
619
+ content = file_path.read_text(encoding="utf-8")
620
+ except UnicodeDecodeError:
621
+ try:
622
+ content = file_path.read_text(encoding="latin-1")
623
+ except Exception:
624
+ return None
625
+ except OSError:
626
+ return None
627
+
628
+ lines = content.split("\n")
629
+ max_lines = 1000 if priority == ContextPriority.CRITICAL else 200
630
+
631
+ if len(lines) > max_lines:
632
+ header = f"# File: {file_path.name} ({len(lines)} lines, showing first {max_lines})\n"
633
+ return header + "\n".join(lines[:max_lines]) + "\n[... truncated ...]"
634
+
635
+ return f"# File: {file_path.name}\n{content}"
636
+
637
+ def _summarize_node(self, node: "GraphNode") -> str:
638
+ """Create a compact summary of a graph node.
639
+
640
+ Used for MEDIUM/LOW priority items to save budget.
641
+ """
642
+ from codebase_intel.core.types import GraphNode # avoid circular
643
+
644
+ parts = [f"{node.kind.value}: {node.qualified_name}"]
645
+ if node.docstring:
646
+ parts.append(f" {node.docstring[:200]}")
647
+ if node.line_range:
648
+ parts.append(f" {node.file_path.name}:{node.line_range.start}-{node.line_range.end}")
649
+ return "\n".join(parts)