doit-toolkit-cli 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of doit-toolkit-cli might be problematic. Click here for more details.

Files changed (135) hide show
  1. doit_cli/__init__.py +1356 -0
  2. doit_cli/cli/__init__.py +26 -0
  3. doit_cli/cli/analytics_command.py +616 -0
  4. doit_cli/cli/context_command.py +213 -0
  5. doit_cli/cli/diagram_command.py +304 -0
  6. doit_cli/cli/fixit_command.py +641 -0
  7. doit_cli/cli/hooks_command.py +211 -0
  8. doit_cli/cli/init_command.py +613 -0
  9. doit_cli/cli/memory_command.py +293 -0
  10. doit_cli/cli/roadmapit_command.py +10 -0
  11. doit_cli/cli/status_command.py +117 -0
  12. doit_cli/cli/sync_prompts_command.py +248 -0
  13. doit_cli/cli/validate_command.py +196 -0
  14. doit_cli/cli/verify_command.py +204 -0
  15. doit_cli/cli/workflow_mixin.py +224 -0
  16. doit_cli/cli/xref_command.py +555 -0
  17. doit_cli/formatters/__init__.py +8 -0
  18. doit_cli/formatters/base.py +38 -0
  19. doit_cli/formatters/json_formatter.py +126 -0
  20. doit_cli/formatters/markdown_formatter.py +97 -0
  21. doit_cli/formatters/rich_formatter.py +257 -0
  22. doit_cli/main.py +51 -0
  23. doit_cli/models/__init__.py +139 -0
  24. doit_cli/models/agent.py +74 -0
  25. doit_cli/models/analytics_models.py +384 -0
  26. doit_cli/models/context_config.py +464 -0
  27. doit_cli/models/crossref_models.py +182 -0
  28. doit_cli/models/diagram_models.py +363 -0
  29. doit_cli/models/fixit_models.py +355 -0
  30. doit_cli/models/hook_config.py +125 -0
  31. doit_cli/models/project.py +91 -0
  32. doit_cli/models/results.py +121 -0
  33. doit_cli/models/search_models.py +228 -0
  34. doit_cli/models/status_models.py +195 -0
  35. doit_cli/models/sync_models.py +146 -0
  36. doit_cli/models/template.py +77 -0
  37. doit_cli/models/validation_models.py +175 -0
  38. doit_cli/models/workflow_models.py +319 -0
  39. doit_cli/prompts/__init__.py +5 -0
  40. doit_cli/prompts/fixit_prompts.py +344 -0
  41. doit_cli/prompts/interactive.py +390 -0
  42. doit_cli/rules/__init__.py +5 -0
  43. doit_cli/rules/builtin_rules.py +160 -0
  44. doit_cli/services/__init__.py +79 -0
  45. doit_cli/services/agent_detector.py +168 -0
  46. doit_cli/services/analytics_service.py +218 -0
  47. doit_cli/services/architecture_generator.py +290 -0
  48. doit_cli/services/backup_service.py +204 -0
  49. doit_cli/services/config_loader.py +113 -0
  50. doit_cli/services/context_loader.py +1123 -0
  51. doit_cli/services/coverage_calculator.py +142 -0
  52. doit_cli/services/crossref_service.py +237 -0
  53. doit_cli/services/cycle_time_calculator.py +134 -0
  54. doit_cli/services/date_inferrer.py +349 -0
  55. doit_cli/services/diagram_service.py +337 -0
  56. doit_cli/services/drift_detector.py +109 -0
  57. doit_cli/services/entity_parser.py +301 -0
  58. doit_cli/services/er_diagram_generator.py +197 -0
  59. doit_cli/services/fixit_service.py +699 -0
  60. doit_cli/services/github_service.py +192 -0
  61. doit_cli/services/hook_manager.py +258 -0
  62. doit_cli/services/hook_validator.py +528 -0
  63. doit_cli/services/input_validator.py +322 -0
  64. doit_cli/services/memory_search.py +527 -0
  65. doit_cli/services/mermaid_validator.py +334 -0
  66. doit_cli/services/prompt_transformer.py +91 -0
  67. doit_cli/services/prompt_writer.py +133 -0
  68. doit_cli/services/query_interpreter.py +428 -0
  69. doit_cli/services/report_exporter.py +219 -0
  70. doit_cli/services/report_generator.py +256 -0
  71. doit_cli/services/requirement_parser.py +112 -0
  72. doit_cli/services/roadmap_summarizer.py +209 -0
  73. doit_cli/services/rule_engine.py +443 -0
  74. doit_cli/services/scaffolder.py +215 -0
  75. doit_cli/services/score_calculator.py +172 -0
  76. doit_cli/services/section_parser.py +204 -0
  77. doit_cli/services/spec_scanner.py +327 -0
  78. doit_cli/services/state_manager.py +355 -0
  79. doit_cli/services/status_reporter.py +143 -0
  80. doit_cli/services/task_parser.py +347 -0
  81. doit_cli/services/template_manager.py +710 -0
  82. doit_cli/services/template_reader.py +158 -0
  83. doit_cli/services/user_journey_generator.py +214 -0
  84. doit_cli/services/user_story_parser.py +232 -0
  85. doit_cli/services/validation_service.py +188 -0
  86. doit_cli/services/validator.py +232 -0
  87. doit_cli/services/velocity_tracker.py +173 -0
  88. doit_cli/services/workflow_engine.py +405 -0
  89. doit_cli/templates/agent-file-template.md +28 -0
  90. doit_cli/templates/checklist-template.md +39 -0
  91. doit_cli/templates/commands/doit.checkin.md +363 -0
  92. doit_cli/templates/commands/doit.constitution.md +187 -0
  93. doit_cli/templates/commands/doit.documentit.md +485 -0
  94. doit_cli/templates/commands/doit.fixit.md +181 -0
  95. doit_cli/templates/commands/doit.implementit.md +265 -0
  96. doit_cli/templates/commands/doit.planit.md +262 -0
  97. doit_cli/templates/commands/doit.reviewit.md +355 -0
  98. doit_cli/templates/commands/doit.roadmapit.md +389 -0
  99. doit_cli/templates/commands/doit.scaffoldit.md +458 -0
  100. doit_cli/templates/commands/doit.specit.md +521 -0
  101. doit_cli/templates/commands/doit.taskit.md +304 -0
  102. doit_cli/templates/commands/doit.testit.md +277 -0
  103. doit_cli/templates/config/context.yaml +134 -0
  104. doit_cli/templates/config/hooks.yaml +93 -0
  105. doit_cli/templates/config/validation-rules.yaml +64 -0
  106. doit_cli/templates/github-issue-templates/epic.yml +78 -0
  107. doit_cli/templates/github-issue-templates/feature.yml +116 -0
  108. doit_cli/templates/github-issue-templates/task.yml +129 -0
  109. doit_cli/templates/hooks/.gitkeep +0 -0
  110. doit_cli/templates/hooks/post-commit.sh +25 -0
  111. doit_cli/templates/hooks/post-merge.sh +75 -0
  112. doit_cli/templates/hooks/pre-commit.sh +17 -0
  113. doit_cli/templates/hooks/pre-push.sh +18 -0
  114. doit_cli/templates/memory/completed_roadmap.md +50 -0
  115. doit_cli/templates/memory/constitution.md +125 -0
  116. doit_cli/templates/memory/roadmap.md +61 -0
  117. doit_cli/templates/plan-template.md +146 -0
  118. doit_cli/templates/scripts/bash/check-prerequisites.sh +166 -0
  119. doit_cli/templates/scripts/bash/common.sh +156 -0
  120. doit_cli/templates/scripts/bash/create-new-feature.sh +297 -0
  121. doit_cli/templates/scripts/bash/setup-plan.sh +61 -0
  122. doit_cli/templates/scripts/bash/update-agent-context.sh +675 -0
  123. doit_cli/templates/scripts/powershell/check-prerequisites.ps1 +148 -0
  124. doit_cli/templates/scripts/powershell/common.ps1 +137 -0
  125. doit_cli/templates/scripts/powershell/create-new-feature.ps1 +283 -0
  126. doit_cli/templates/scripts/powershell/setup-plan.ps1 +61 -0
  127. doit_cli/templates/scripts/powershell/update-agent-context.ps1 +406 -0
  128. doit_cli/templates/spec-template.md +159 -0
  129. doit_cli/templates/tasks-template.md +313 -0
  130. doit_cli/templates/vscode-settings.json +14 -0
  131. doit_toolkit_cli-0.1.10.dist-info/METADATA +324 -0
  132. doit_toolkit_cli-0.1.10.dist-info/RECORD +135 -0
  133. doit_toolkit_cli-0.1.10.dist-info/WHEEL +4 -0
  134. doit_toolkit_cli-0.1.10.dist-info/entry_points.txt +2 -0
  135. doit_toolkit_cli-0.1.10.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1123 @@
1
+ """Context loading service for AI context injection.
2
+
3
+ This module provides the ContextLoader service that loads and aggregates
4
+ project context (constitution, roadmap, specs) for injection into doit commands.
5
+ """
6
+
7
+ import logging
8
+ import os
9
+ import re
10
+ import subprocess
11
+ from datetime import datetime
12
+ from pathlib import Path
13
+ from typing import Optional
14
+
15
+ from ..models.context_config import (
16
+ CompletedItem,
17
+ ContextConfig,
18
+ ContextSource,
19
+ LoadedContext,
20
+ SourceConfig,
21
+ SummarizationConfig,
22
+ )
23
+ from .roadmap_summarizer import RoadmapSummarizer
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ # Cache for loaded tiktoken encoding
28
+ _tiktoken_encoding = None
29
+ _tiktoken_available: Optional[bool] = None
30
+
31
+ # Cache for sklearn availability
32
+ _sklearn_available: Optional[bool] = None
33
+
34
+
35
+ def _has_tiktoken() -> bool:
36
+ """Check if tiktoken is available."""
37
+ global _tiktoken_available
38
+ if _tiktoken_available is None:
39
+ try:
40
+ import tiktoken # noqa: F401
41
+ _tiktoken_available = True
42
+ except ImportError:
43
+ _tiktoken_available = False
44
+ return _tiktoken_available
45
+
46
+
47
+ def _has_sklearn() -> bool:
48
+ """Check if scikit-learn is available."""
49
+ global _sklearn_available
50
+ if _sklearn_available is None:
51
+ try:
52
+ from sklearn.feature_extraction.text import TfidfVectorizer # noqa: F401
53
+ from sklearn.metrics.pairwise import cosine_similarity # noqa: F401
54
+ _sklearn_available = True
55
+ except ImportError:
56
+ _sklearn_available = False
57
+ return _sklearn_available
58
+
59
+
60
+ def estimate_tokens(text: str) -> int:
61
+ """Estimate token count for text.
62
+
63
+ Uses tiktoken if available, otherwise falls back to character-based estimate.
64
+
65
+ Args:
66
+ text: The text to estimate tokens for.
67
+
68
+ Returns:
69
+ Estimated token count.
70
+ """
71
+ global _tiktoken_encoding
72
+
73
+ if _has_tiktoken():
74
+ try:
75
+ import tiktoken
76
+ if _tiktoken_encoding is None:
77
+ _tiktoken_encoding = tiktoken.get_encoding("cl100k_base")
78
+ return len(_tiktoken_encoding.encode(text))
79
+ except Exception:
80
+ pass
81
+
82
+ # Fallback: approximately 4 characters per token
83
+ return max(1, len(text) // 4)
84
+
85
+
86
+ def truncate_content(content: str, max_tokens: int, path: Path) -> tuple[str, bool, int]:
87
+ """Truncate content while preserving markdown structure.
88
+
89
+ Algorithm:
90
+ 1. If content fits within limit, return as-is
91
+ 2. Extract and preserve:
92
+ - Title (first H1)
93
+ - All H2 headers with first paragraph under each
94
+ - Any "Summary" or "Overview" sections in full
95
+ 3. Add truncation notice
96
+ 4. Fill remaining tokens with content from top of file
97
+
98
+ Args:
99
+ content: The markdown content to truncate.
100
+ max_tokens: Maximum token count.
101
+ path: Path to the file (for truncation notice).
102
+
103
+ Returns:
104
+ Tuple of (truncated_content, was_truncated, original_tokens).
105
+ """
106
+ original_tokens = estimate_tokens(content)
107
+
108
+ if original_tokens <= max_tokens:
109
+ return content, False, original_tokens
110
+
111
+ lines = content.split("\n")
112
+ result_lines: list[str] = []
113
+ current_tokens = 0
114
+
115
+ # Find title (first H1)
116
+ title_line = None
117
+ for i, line in enumerate(lines):
118
+ if line.startswith("# ") and not line.startswith("## "):
119
+ title_line = line
120
+ break
121
+
122
+ if title_line:
123
+ result_lines.append(title_line)
124
+ result_lines.append("")
125
+ current_tokens = estimate_tokens("\n".join(result_lines))
126
+
127
+ # Find Summary/Overview sections and H2 headers
128
+ i = 0
129
+ summary_found = False
130
+ while i < len(lines) and current_tokens < max_tokens * 0.7:
131
+ line = lines[i]
132
+
133
+ # Check for Summary or Overview sections
134
+ if re.match(r"^##\s+(Summary|Overview)", line, re.IGNORECASE):
135
+ summary_found = True
136
+ section_lines = [line]
137
+ i += 1
138
+ # Collect entire section
139
+ while i < len(lines) and not lines[i].startswith("## "):
140
+ section_lines.append(lines[i])
141
+ i += 1
142
+ section_text = "\n".join(section_lines)
143
+ section_tokens = estimate_tokens(section_text)
144
+ if current_tokens + section_tokens < max_tokens * 0.9:
145
+ result_lines.extend(section_lines)
146
+ current_tokens += section_tokens
147
+ continue
148
+
149
+ # Collect H2 headers with first paragraph
150
+ if line.startswith("## ") and not summary_found:
151
+ result_lines.append(line)
152
+ result_lines.append("")
153
+ i += 1
154
+ # Get first paragraph after header
155
+ paragraph_lines = []
156
+ while i < len(lines) and lines[i].strip() and not lines[i].startswith("#"):
157
+ paragraph_lines.append(lines[i])
158
+ i += 1
159
+ if paragraph_lines:
160
+ result_lines.extend(paragraph_lines)
161
+ result_lines.append("")
162
+ current_tokens = estimate_tokens("\n".join(result_lines))
163
+ continue
164
+
165
+ i += 1
166
+
167
+ # Fill remaining space with content from top
168
+ target_tokens = max_tokens - 50 # Leave room for truncation notice
169
+ if current_tokens < target_tokens:
170
+ # Add content from the beginning that wasn't already added
171
+ remaining_content = []
172
+ for line in lines:
173
+ if line not in result_lines:
174
+ remaining_content.append(line)
175
+ test_result = "\n".join(result_lines + remaining_content)
176
+ if estimate_tokens(test_result) > target_tokens:
177
+ remaining_content.pop()
178
+ break
179
+ result_lines.extend(remaining_content)
180
+
181
+ # Add truncation notice
182
+ result_lines.append("")
183
+ result_lines.append(f"<!-- Content truncated from {original_tokens} to ~{max_tokens} tokens. Full file at: {path} -->")
184
+
185
+ truncated_content = "\n".join(result_lines)
186
+ return truncated_content, True, original_tokens
187
+
188
+
189
+ def extract_keywords(text: str) -> set[str]:
190
+ """Extract meaningful keywords from text for similarity matching.
191
+
192
+ Args:
193
+ text: Text to extract keywords from.
194
+
195
+ Returns:
196
+ Set of lowercase keywords.
197
+ """
198
+ # Common stop words to exclude
199
+ stop_words = {
200
+ "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for",
201
+ "of", "with", "by", "from", "as", "is", "was", "are", "were", "been",
202
+ "be", "have", "has", "had", "do", "does", "did", "will", "would",
203
+ "could", "should", "may", "might", "must", "shall", "can", "need",
204
+ "this", "that", "these", "those", "it", "its", "they", "them",
205
+ "their", "we", "our", "you", "your", "i", "my", "me", "he", "she",
206
+ "his", "her", "him", "who", "what", "which", "when", "where", "why",
207
+ "how", "all", "each", "every", "both", "few", "more", "most", "other",
208
+ "some", "such", "no", "not", "only", "own", "same", "so", "than",
209
+ "too", "very", "just", "also", "now", "new", "first", "last", "long",
210
+ "great", "little", "old", "big", "small", "high", "low", "good", "bad",
211
+ }
212
+
213
+ # Extract words (alphanumeric sequences)
214
+ words = re.findall(r"\b[a-zA-Z][a-zA-Z0-9_-]*\b", text.lower())
215
+
216
+ # Filter out stop words and short words
217
+ keywords = {w for w in words if w not in stop_words and len(w) > 2}
218
+
219
+ return keywords
220
+
221
+
222
+ def compute_similarity_scores(
223
+ current_text: str, candidate_texts: list[str]
224
+ ) -> list[float]:
225
+ """Compute similarity scores between current text and candidates.
226
+
227
+ Uses TF-IDF and cosine similarity if scikit-learn is available,
228
+ otherwise falls back to keyword overlap.
229
+
230
+ Args:
231
+ current_text: The reference text.
232
+ candidate_texts: List of texts to compare against.
233
+
234
+ Returns:
235
+ List of similarity scores (0.0 to 1.0) for each candidate.
236
+ """
237
+ if not candidate_texts:
238
+ return []
239
+
240
+ if _has_sklearn():
241
+ try:
242
+ from sklearn.feature_extraction.text import TfidfVectorizer
243
+ from sklearn.metrics.pairwise import cosine_similarity
244
+
245
+ all_texts = [current_text] + candidate_texts
246
+ vectorizer = TfidfVectorizer(stop_words="english", max_features=1000)
247
+ tfidf_matrix = vectorizer.fit_transform(all_texts)
248
+
249
+ # Compute cosine similarity between first text and all others
250
+ similarities = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:])
251
+ return similarities[0].tolist()
252
+ except Exception:
253
+ pass
254
+
255
+ # Fallback: keyword overlap (Jaccard similarity)
256
+ current_keywords = extract_keywords(current_text)
257
+ if not current_keywords:
258
+ return [0.0] * len(candidate_texts)
259
+
260
+ scores = []
261
+ for candidate in candidate_texts:
262
+ candidate_keywords = extract_keywords(candidate)
263
+ if not candidate_keywords:
264
+ scores.append(0.0)
265
+ continue
266
+ intersection = len(current_keywords & candidate_keywords)
267
+ union = len(current_keywords | candidate_keywords)
268
+ scores.append(intersection / union if union > 0 else 0.0)
269
+
270
+ return scores
271
+
272
+
273
+ class ContextCondenser:
274
+ """Service for condensing context when it exceeds token thresholds.
275
+
276
+ Uses a two-tier approach:
277
+ 1. Soft threshold: Add guidance prompt for the AI agent to prioritize
278
+ 2. Hard limit: Truncate sources based on priority configuration
279
+
280
+ The key insight is that the AI coding agent (Claude, Copilot, etc.) running
281
+ the command IS the summarizer - no external API calls are needed. The guidance
282
+ prompt tells the AI how to prioritize the provided context.
283
+ """
284
+
285
+ def __init__(self, config: SummarizationConfig) -> None:
286
+ """Initialize with summarization configuration.
287
+
288
+ Args:
289
+ config: SummarizationConfig with threshold and priority settings.
290
+ """
291
+ self.config = config
292
+
293
+ def check_threshold(
294
+ self, total_tokens: int, max_tokens: int
295
+ ) -> tuple[bool, bool]:
296
+ """Check if context exceeds soft or hard thresholds.
297
+
298
+ Args:
299
+ total_tokens: Current total token count.
300
+ max_tokens: Maximum allowed tokens.
301
+
302
+ Returns:
303
+ Tuple of (exceeds_soft_threshold, exceeds_hard_limit).
304
+ """
305
+ soft_threshold = int(max_tokens * (self.config.threshold_percentage / 100.0))
306
+ return (total_tokens >= soft_threshold, total_tokens >= max_tokens)
307
+
308
+ def add_guidance_prompt(
309
+ self,
310
+ content: str,
311
+ current_feature: Optional[str] = None,
312
+ ) -> str:
313
+ """Add AI guidance prompt when context exceeds soft threshold.
314
+
315
+ The guidance tells the AI coding agent how to prioritize the context.
316
+ This works identically for Claude, Copilot, Cursor, or any AI agent.
317
+
318
+ Args:
319
+ content: The markdown context content.
320
+ current_feature: Current feature branch name for highlighting.
321
+
322
+ Returns:
323
+ Markdown content with guidance prepended.
324
+ """
325
+ guidance_lines = [
326
+ "<!-- AI CONTEXT GUIDANCE -->",
327
+ "**Context Priority Instructions**: This context has been condensed. Please:",
328
+ "- **Focus on P1/P2 priority items** in the roadmap - these are critical/high priority",
329
+ ]
330
+
331
+ if current_feature:
332
+ guidance_lines.append(
333
+ f"- **Pay special attention** to items related to: `{current_feature}`"
334
+ )
335
+
336
+ guidance_lines.extend([
337
+ "- Treat P3/P4 items as background context only",
338
+ "- Use completed roadmap items for pattern reference and consistency",
339
+ "<!-- END GUIDANCE -->",
340
+ "",
341
+ ])
342
+
343
+ return "\n".join(guidance_lines) + content
344
+
345
+ def truncate_if_needed(
346
+ self,
347
+ sources: list["ContextSource"],
348
+ max_tokens: int,
349
+ source_priorities: list[str],
350
+ ) -> tuple[list["ContextSource"], int]:
351
+ """Truncate sources based on priority when exceeding hard limit.
352
+
353
+ Removes lowest-priority sources first until under limit.
354
+ Uses source_priorities from config to determine removal order.
355
+
356
+ Args:
357
+ sources: List of context sources.
358
+ max_tokens: Maximum total token count.
359
+ source_priorities: Ordered list of source types to preserve.
360
+
361
+ Returns:
362
+ Tuple of (filtered sources, new total tokens).
363
+ """
364
+ total_tokens = sum(s.token_count for s in sources)
365
+
366
+ if total_tokens <= max_tokens:
367
+ return sources, total_tokens
368
+
369
+ # Build priority map (lower index = higher priority = keep)
370
+ priority_map: dict[str, int] = {}
371
+ for idx, source_type in enumerate(source_priorities):
372
+ priority_map[source_type] = idx
373
+
374
+ # Sort sources by priority (higher priority = lower index = first)
375
+ # Sources not in priority list get lowest priority (will be removed first)
376
+ sorted_sources = sorted(
377
+ sources,
378
+ key=lambda s: priority_map.get(s.source_type, 999),
379
+ )
380
+
381
+ # Keep sources until we exceed the limit
382
+ kept_sources: list[ContextSource] = []
383
+ kept_tokens = 0
384
+
385
+ for source in sorted_sources:
386
+ if kept_tokens + source.token_count <= max_tokens:
387
+ kept_sources.append(source)
388
+ kept_tokens += source.token_count
389
+ else:
390
+ logger.debug(
391
+ f"Truncating source '{source.source_type}' due to token limit "
392
+ f"({kept_tokens + source.token_count} > {max_tokens})"
393
+ )
394
+
395
+ return kept_sources, kept_tokens
396
+
397
+
398
+ def parse_completed_roadmap(content: str) -> list[CompletedItem]:
399
+ """Parse completed_roadmap.md content into CompletedItem list.
400
+
401
+ Handles markdown table format with columns for item, priority, date, branch.
402
+
403
+ Args:
404
+ content: Raw markdown content of completed_roadmap.md
405
+
406
+ Returns:
407
+ List of CompletedItem objects
408
+ """
409
+ from datetime import date as date_type
410
+
411
+ items: list[CompletedItem] = []
412
+ lines = content.split("\n")
413
+
414
+ # Find table rows (lines starting with |)
415
+ in_table = False
416
+ for line in lines:
417
+ line = line.strip()
418
+
419
+ # Skip header separator row (|---|---|...)
420
+ if line.startswith("|") and "---" in line:
421
+ in_table = True
422
+ continue
423
+
424
+ # Skip header row before separator
425
+ if line.startswith("|") and not in_table:
426
+ continue
427
+
428
+ # Parse table data rows
429
+ if line.startswith("|") and in_table:
430
+ # Split by | and filter empty strings
431
+ cells = [c.strip() for c in line.split("|") if c.strip()]
432
+ if len(cells) >= 2:
433
+ text = cells[0]
434
+ priority = cells[1] if len(cells) > 1 else ""
435
+ date_str = cells[2] if len(cells) > 2 else ""
436
+ branch = cells[3] if len(cells) > 3 else ""
437
+
438
+ # Parse date if provided
439
+ completion_date = None
440
+ if date_str:
441
+ try:
442
+ # Try common formats
443
+ for fmt in ["%Y-%m-%d", "%m/%d/%Y", "%d-%m-%Y"]:
444
+ try:
445
+ completion_date = date_type.fromisoformat(date_str) if "-" in date_str and len(date_str) == 10 else None
446
+ break
447
+ except ValueError:
448
+ continue
449
+ except Exception:
450
+ pass
451
+
452
+ items.append(CompletedItem(
453
+ text=text,
454
+ priority=priority,
455
+ completion_date=completion_date,
456
+ feature_branch=branch,
457
+ relevance_score=0.0,
458
+ ))
459
+
460
+ return items
461
+
462
+
463
+ def format_completed_for_context(items: list[CompletedItem]) -> str:
464
+ """Format completed items as AI-friendly markdown.
465
+
466
+ Creates a structured format that AI agents can semantically match
467
+ against the current feature being implemented.
468
+
469
+ Args:
470
+ items: List of CompletedItem to format
471
+
472
+ Returns:
473
+ Formatted markdown string for context injection
474
+ """
475
+ if not items:
476
+ return ""
477
+
478
+ sections = ["## Completed Roadmap Items", ""]
479
+ sections.append("*Related completed features for context:*")
480
+ sections.append("")
481
+
482
+ for item in items:
483
+ # Format the item with available metadata
484
+ line = f"- **{item.text}**"
485
+ if item.priority:
486
+ line += f" ({item.priority})"
487
+ sections.append(line)
488
+
489
+ # Add metadata as sub-items
490
+ if item.completion_date:
491
+ sections.append(f" - Completed: {item.completion_date}")
492
+ if item.feature_branch:
493
+ sections.append(f" - Branch: `{item.feature_branch}`")
494
+
495
+ sections.append("")
496
+ return "\n".join(sections)
497
+
498
+
499
+ class ContextLoader:
500
+ """Service for loading and aggregating project context.
501
+
502
+ This service loads context from various sources (constitution, roadmap,
503
+ specs) and formats them for injection into AI command prompts.
504
+ """
505
+
506
+ def __init__(
507
+ self,
508
+ project_root: Path,
509
+ config: Optional[ContextConfig] = None,
510
+ command: Optional[str] = None,
511
+ ):
512
+ """Initialize context loader.
513
+
514
+ Args:
515
+ project_root: Root directory of the project.
516
+ config: Context configuration (loads default if None).
517
+ command: Current command name for per-command overrides.
518
+ """
519
+ self.project_root = project_root
520
+ self.command = command
521
+
522
+ # Load config from file if not provided
523
+ if config is None:
524
+ config_path = project_root / ".doit" / "config" / "context.yaml"
525
+ self.config = ContextConfig.from_yaml(config_path)
526
+ else:
527
+ self.config = config
528
+
529
+ # Internal cache for loaded content
530
+ self._cache: dict[Path, str] = {}
531
+
532
+ def _is_debug_enabled(self) -> bool:
533
+ """Check if debug mode is enabled."""
534
+ return os.environ.get("DOIT_DEBUG", "").lower() in ("1", "true", "yes")
535
+
536
+ def _log_debug(self, message: str) -> None:
537
+ """Log debug message if debug mode is enabled."""
538
+ if self._is_debug_enabled():
539
+ print(f"[context] {message}")
540
+
541
+ def _read_file(self, path: Path) -> Optional[str]:
542
+ """Read file content with caching.
543
+
544
+ Args:
545
+ path: Path to the file to read.
546
+
547
+ Returns:
548
+ File content or None if file doesn't exist or can't be read.
549
+ """
550
+ if path in self._cache:
551
+ return self._cache[path]
552
+
553
+ if not path.exists():
554
+ return None
555
+
556
+ try:
557
+ content = path.read_text(encoding="utf-8")
558
+ self._cache[path] = content
559
+ return content
560
+ except (OSError, UnicodeDecodeError) as e:
561
+ self._log_debug(f"Warning: Could not read {path}: {e}")
562
+ return None
563
+
564
+ def load(self) -> LoadedContext:
565
+ """Load all configured context sources.
566
+
567
+ Returns:
568
+ LoadedContext with all sources loaded and processed.
569
+ """
570
+ if not self.config.enabled:
571
+ return LoadedContext(loaded_at=datetime.now())
572
+
573
+ sources: list[ContextSource] = []
574
+ total_tokens = 0
575
+
576
+ # Get source configs sorted by priority
577
+ source_configs = [
578
+ (name, self.config.get_source_config(name, self.command))
579
+ for name in ["constitution", "roadmap", "completed_roadmap", "current_spec", "related_specs"]
580
+ ]
581
+ source_configs.sort(key=lambda x: x[1].priority)
582
+
583
+ for source_name, source_config in source_configs:
584
+ if not source_config.enabled:
585
+ continue
586
+
587
+ # Check total token limit
588
+ if total_tokens >= self.config.total_max_tokens:
589
+ self._log_debug(f"Skipping {source_name}: total token limit reached")
590
+ break
591
+
592
+ remaining_tokens = self.config.total_max_tokens - total_tokens
593
+ max_for_source = min(self.config.max_tokens_per_source, remaining_tokens)
594
+
595
+ if source_name == "constitution":
596
+ source = self.load_constitution(max_tokens=max_for_source)
597
+ if source:
598
+ sources.append(source)
599
+ total_tokens += source.token_count
600
+ elif source_name == "roadmap":
601
+ source = self.load_roadmap(max_tokens=max_for_source)
602
+ if source:
603
+ sources.append(source)
604
+ total_tokens += source.token_count
605
+ elif source_name == "completed_roadmap":
606
+ source = self.load_completed_roadmap(max_tokens=max_for_source)
607
+ if source:
608
+ sources.append(source)
609
+ total_tokens += source.token_count
610
+ elif source_name == "current_spec":
611
+ source = self.load_current_spec(max_tokens=max_for_source)
612
+ if source:
613
+ sources.append(source)
614
+ total_tokens += source.token_count
615
+ elif source_name == "related_specs":
616
+ related = self.find_related_specs(
617
+ max_count=source_config.max_count,
618
+ max_tokens_per_spec=max_for_source // max(source_config.max_count, 1),
619
+ )
620
+ for spec in related:
621
+ if total_tokens + spec.token_count <= self.config.total_max_tokens:
622
+ sources.append(spec)
623
+ total_tokens += spec.token_count
624
+
625
+ any_truncated = any(s.truncated for s in sources)
626
+
627
+ self._log_debug(f"Total context: {total_tokens} tokens from {len(sources)} sources")
628
+
629
+ # Apply condensation if needed
630
+ context = LoadedContext(
631
+ sources=sources,
632
+ total_tokens=total_tokens,
633
+ any_truncated=any_truncated,
634
+ loaded_at=datetime.now(),
635
+ )
636
+
637
+ return self._check_and_apply_condensation(context)
638
+
639
+ def _check_and_apply_condensation(
640
+ self, context: LoadedContext
641
+ ) -> LoadedContext:
642
+ """Apply condensation if context exceeds token thresholds.
643
+
644
+ Uses a two-tier approach:
645
+ 1. Soft threshold: Adds guidance prompt for AI to prioritize
646
+ 2. Hard limit: Truncates sources based on priority
647
+
648
+ The guidance prompt is designed to work with any AI coding agent
649
+ (Claude, Copilot, Cursor, etc.) - no external API calls needed.
650
+
651
+ Args:
652
+ context: The loaded context to check.
653
+
654
+ Returns:
655
+ LoadedContext, possibly with condensation_guidance set.
656
+ """
657
+ if not self.config.summarization.enabled:
658
+ return context
659
+
660
+ condenser = ContextCondenser(self.config.summarization)
661
+
662
+ exceeds_soft, exceeds_hard = condenser.check_threshold(
663
+ context.total_tokens, self.config.total_max_tokens
664
+ )
665
+
666
+ # If exceeds hard limit, truncate sources
667
+ if exceeds_hard:
668
+ self._log_debug(
669
+ f"Context exceeds hard limit ({context.total_tokens} >= "
670
+ f"{self.config.total_max_tokens}), truncating sources"
671
+ )
672
+ new_sources, new_total = condenser.truncate_if_needed(
673
+ context.sources,
674
+ self.config.total_max_tokens,
675
+ self.config.summarization.source_priorities,
676
+ )
677
+ context = LoadedContext(
678
+ sources=new_sources,
679
+ total_tokens=new_total,
680
+ any_truncated=True,
681
+ loaded_at=context.loaded_at,
682
+ )
683
+ # Recheck soft threshold after truncation
684
+ exceeds_soft, _ = condenser.check_threshold(
685
+ context.total_tokens, self.config.total_max_tokens
686
+ )
687
+
688
+ # If exceeds soft threshold, add guidance prompt
689
+ if exceeds_soft:
690
+ soft_threshold = int(
691
+ self.config.total_max_tokens
692
+ * (self.config.summarization.threshold_percentage / 100.0)
693
+ )
694
+ self._log_debug(
695
+ f"Context exceeds soft threshold ({context.total_tokens} >= "
696
+ f"{soft_threshold}), adding guidance prompt"
697
+ )
698
+
699
+ # Get current feature for context-aware guidance
700
+ current_feature = None
701
+ branch = self.get_current_branch()
702
+ if branch:
703
+ current_feature = self.extract_feature_name(branch)
704
+
705
+ # Store guidance flag in context for to_markdown to use
706
+ context._guidance_prompt = condenser.add_guidance_prompt(
707
+ "", current_feature
708
+ ).rstrip()
709
+
710
+ return context
711
+
712
+ def load_constitution(self, max_tokens: Optional[int] = None) -> Optional[ContextSource]:
713
+ """Load constitution.md if enabled and exists.
714
+
715
+ Args:
716
+ max_tokens: Maximum token count (uses config default if None).
717
+
718
+ Returns:
719
+ ContextSource for constitution or None if not available.
720
+ """
721
+ max_tokens = max_tokens or self.config.max_tokens_per_source
722
+ path = self.project_root / ".doit" / "memory" / "constitution.md"
723
+
724
+ content = self._read_file(path)
725
+ if content is None:
726
+ self._log_debug("Constitution not found")
727
+ return None
728
+
729
+ truncated_content, was_truncated, original_tokens = truncate_content(
730
+ content, max_tokens, path
731
+ )
732
+ token_count = estimate_tokens(truncated_content)
733
+
734
+ self._log_debug(f"Loaded constitution: {token_count} tokens")
735
+
736
+ return ContextSource(
737
+ source_type="constitution",
738
+ path=path,
739
+ content=truncated_content,
740
+ token_count=token_count,
741
+ truncated=was_truncated,
742
+ original_tokens=original_tokens if was_truncated else None,
743
+ )
744
+
745
+ def load_roadmap(self, max_tokens: Optional[int] = None) -> Optional[ContextSource]:
746
+ """Load roadmap.md if enabled and exists.
747
+
748
+ If summarization is enabled AND the roadmap exceeds max_tokens,
749
+ parses the roadmap and generates a condensed summary with P1/P2
750
+ items prioritized.
751
+
752
+ Args:
753
+ max_tokens: Maximum token count (uses config default if None).
754
+
755
+ Returns:
756
+ ContextSource for roadmap or None if not available.
757
+ """
758
+ max_tokens = max_tokens or self.config.max_tokens_per_source
759
+ path = self.project_root / ".doit" / "memory" / "roadmap.md"
760
+
761
+ content = self._read_file(path)
762
+ if content is None:
763
+ self._log_debug("Roadmap not found")
764
+ return None
765
+
766
+ # Check if summarization is needed (enabled AND exceeds limit)
767
+ original_tokens = estimate_tokens(content)
768
+ if self.config.summarization.enabled and original_tokens > max_tokens:
769
+ return self._summarize_roadmap(path, content, max_tokens)
770
+
771
+ # Content fits within limit - use truncation only if needed
772
+ truncated_content, was_truncated, original_tokens = truncate_content(
773
+ content, max_tokens, path
774
+ )
775
+ token_count = estimate_tokens(truncated_content)
776
+
777
+ self._log_debug(f"Loaded roadmap: {token_count} tokens")
778
+
779
+ return ContextSource(
780
+ source_type="roadmap",
781
+ path=path,
782
+ content=truncated_content,
783
+ token_count=token_count,
784
+ truncated=was_truncated,
785
+ original_tokens=original_tokens if was_truncated else None,
786
+ )
787
+
788
+ def _summarize_roadmap(
789
+ self, path: Path, content: str, max_tokens: int
790
+ ) -> ContextSource:
791
+ """Summarize roadmap content by priority.
792
+
793
+ Args:
794
+ path: Path to roadmap file.
795
+ content: Raw roadmap content.
796
+ max_tokens: Maximum token count.
797
+
798
+ Returns:
799
+ ContextSource with summarized roadmap content.
800
+ """
801
+ original_tokens = estimate_tokens(content)
802
+
803
+ # Get current feature for highlighting
804
+ branch = self.get_current_branch()
805
+ current_feature = self.extract_feature_name(branch) if branch else None
806
+
807
+ # Parse and summarize
808
+ summarizer = RoadmapSummarizer(self.config.summarization)
809
+ items = summarizer.parse_roadmap(content)
810
+ summary = summarizer.summarize(items, max_tokens, current_feature)
811
+
812
+ token_count = estimate_tokens(summary.condensed_text)
813
+ was_summarized = token_count < original_tokens
814
+
815
+ self._log_debug(
816
+ f"Loaded roadmap (summarized): {token_count} tokens "
817
+ f"({summary.item_count} items, priorities: {summary.priorities_included})"
818
+ )
819
+
820
+ return ContextSource(
821
+ source_type="roadmap",
822
+ path=path,
823
+ content=summary.condensed_text,
824
+ token_count=token_count,
825
+ truncated=was_summarized,
826
+ original_tokens=original_tokens if was_summarized else None,
827
+ )
828
+
829
+ def load_completed_roadmap(
830
+ self, max_tokens: Optional[int] = None
831
+ ) -> Optional[ContextSource]:
832
+ """Load completed_roadmap.md and format for AI context.
833
+
834
+ Parses the completed roadmap items and formats them for semantic
835
+ matching by the AI coding agent.
836
+
837
+ Args:
838
+ max_tokens: Maximum token count (uses config default if None).
839
+
840
+ Returns:
841
+ ContextSource with formatted completed items, or None if not available.
842
+ """
843
+ max_tokens = max_tokens or self.config.max_tokens_per_source
844
+ path = self.project_root / ".doit" / "memory" / "completed_roadmap.md"
845
+
846
+ content = self._read_file(path)
847
+ if content is None:
848
+ self._log_debug("Completed roadmap not found")
849
+ return None
850
+
851
+ # Parse completed items
852
+ items = parse_completed_roadmap(content)
853
+ if not items:
854
+ self._log_debug("No completed items found in completed_roadmap.md")
855
+ return None
856
+
857
+ # Limit items based on config
858
+ max_count = self.config.summarization.completed_items_max_count
859
+ items = items[:max_count]
860
+
861
+ # Format for context
862
+ formatted_content = format_completed_for_context(items)
863
+ token_count = estimate_tokens(formatted_content)
864
+
865
+ # Truncate if needed
866
+ if token_count > max_tokens:
867
+ truncated_content, was_truncated, original_tokens = truncate_content(
868
+ formatted_content, max_tokens, path
869
+ )
870
+ token_count = estimate_tokens(truncated_content)
871
+ else:
872
+ truncated_content = formatted_content
873
+ was_truncated = False
874
+ original_tokens = token_count
875
+
876
+ self._log_debug(f"Loaded completed_roadmap: {token_count} tokens ({len(items)} items)")
877
+
878
+ return ContextSource(
879
+ source_type="completed_roadmap",
880
+ path=path,
881
+ content=truncated_content,
882
+ token_count=token_count,
883
+ truncated=was_truncated,
884
+ original_tokens=original_tokens if was_truncated else None,
885
+ )
886
+
887
+ def get_current_branch(self) -> Optional[str]:
888
+ """Get current git branch name.
889
+
890
+ Returns:
891
+ Branch name or None if not in a git repo or git unavailable.
892
+ """
893
+ try:
894
+ result = subprocess.run(
895
+ ["git", "rev-parse", "--abbrev-ref", "HEAD"],
896
+ capture_output=True,
897
+ text=True,
898
+ cwd=self.project_root,
899
+ timeout=5,
900
+ )
901
+ if result.returncode == 0:
902
+ return result.stdout.strip()
903
+ except (subprocess.SubprocessError, FileNotFoundError, OSError):
904
+ pass
905
+ return None
906
+
907
+ def extract_feature_name(self, branch: str) -> Optional[str]:
908
+ """Extract feature name from branch name.
909
+
910
+ Expects branch format like '026-ai-context-injection' or 'feature/026-name'.
911
+
912
+ Args:
913
+ branch: Git branch name.
914
+
915
+ Returns:
916
+ Feature name (e.g., '026-ai-context-injection') or None if not matched.
917
+ """
918
+ # Match patterns like: 026-feature-name, feature/026-name
919
+ patterns = [
920
+ r"^(\d{3}-[\w-]+)", # 026-feature-name
921
+ r"^feature/(\d{3}-[\w-]+)", # feature/026-name
922
+ r"^feat/(\d{3}-[\w-]+)", # feat/026-name
923
+ ]
924
+
925
+ for pattern in patterns:
926
+ match = re.match(pattern, branch)
927
+ if match:
928
+ return match.group(1)
929
+
930
+ return None
931
+
932
+ def load_current_spec(self, max_tokens: Optional[int] = None) -> Optional[ContextSource]:
933
+ """Load current feature spec based on branch name.
934
+
935
+ Args:
936
+ max_tokens: Maximum token count (uses config default if None).
937
+
938
+ Returns:
939
+ ContextSource for current spec or None if not available.
940
+ """
941
+ max_tokens = max_tokens or self.config.max_tokens_per_source
942
+
943
+ branch = self.get_current_branch()
944
+ if not branch:
945
+ self._log_debug("Not in a git repository or git unavailable")
946
+ return None
947
+
948
+ feature_name = self.extract_feature_name(branch)
949
+ if not feature_name:
950
+ self._log_debug(f"Branch '{branch}' does not match feature pattern")
951
+ return None
952
+
953
+ # Look for spec in specs directory
954
+ spec_path = self.project_root / "specs" / feature_name / "spec.md"
955
+ if not spec_path.exists():
956
+ self._log_debug(f"Spec not found at {spec_path}")
957
+ return None
958
+
959
+ content = self._read_file(spec_path)
960
+ if content is None:
961
+ return None
962
+
963
+ truncated_content, was_truncated, original_tokens = truncate_content(
964
+ content, max_tokens, spec_path
965
+ )
966
+ token_count = estimate_tokens(truncated_content)
967
+
968
+ self._log_debug(f"Loaded current_spec ({feature_name}): {token_count} tokens")
969
+
970
+ return ContextSource(
971
+ source_type="current_spec",
972
+ path=spec_path,
973
+ content=truncated_content,
974
+ token_count=token_count,
975
+ truncated=was_truncated,
976
+ original_tokens=original_tokens if was_truncated else None,
977
+ )
978
+
979
+ def get_memory_files(self) -> list[Path]:
980
+ """Get list of governance memory files.
981
+
982
+ Returns paths to constitution.md, roadmap.md, and completed_roadmap.md
983
+ if they exist.
984
+
985
+ Returns:
986
+ List of paths to governance memory files.
987
+ """
988
+ memory_dir = self.project_root / ".doit" / "memory"
989
+ files = []
990
+
991
+ governance_files = [
992
+ "constitution.md",
993
+ "roadmap.md",
994
+ "completed_roadmap.md",
995
+ ]
996
+
997
+ for filename in governance_files:
998
+ path = memory_dir / filename
999
+ if path.exists():
1000
+ files.append(path)
1001
+
1002
+ return files
1003
+
1004
+ def get_spec_files(self) -> list[Path]:
1005
+ """Get list of all spec.md files in the specs directory.
1006
+
1007
+ Returns:
1008
+ List of paths to spec.md files.
1009
+ """
1010
+ specs_dir = self.project_root / "specs"
1011
+ files = []
1012
+
1013
+ if not specs_dir.exists():
1014
+ return files
1015
+
1016
+ for spec_dir in specs_dir.iterdir():
1017
+ if not spec_dir.is_dir():
1018
+ continue
1019
+ spec_path = spec_dir / "spec.md"
1020
+ if spec_path.exists():
1021
+ files.append(spec_path)
1022
+
1023
+ return sorted(files)
1024
+
1025
+ def get_all_searchable_files(self) -> list[Path]:
1026
+ """Get all files that can be searched.
1027
+
1028
+ Returns:
1029
+ Combined list of memory and spec files.
1030
+ """
1031
+ return self.get_memory_files() + self.get_spec_files()
1032
+
1033
+ def find_related_specs(
1034
+ self,
1035
+ max_count: int = 3,
1036
+ max_tokens_per_spec: Optional[int] = None,
1037
+ similarity_threshold: float = 0.3,
1038
+ ) -> list[ContextSource]:
1039
+ """Find specs related to current feature.
1040
+
1041
+ Args:
1042
+ max_count: Maximum number of related specs to return.
1043
+ max_tokens_per_spec: Maximum tokens per spec (uses config default if None).
1044
+ similarity_threshold: Minimum similarity score to include.
1045
+
1046
+ Returns:
1047
+ List of ContextSource objects for related specs.
1048
+ """
1049
+ max_tokens = max_tokens_per_spec or self.config.max_tokens_per_source
1050
+
1051
+ # Get current spec for comparison
1052
+ current_spec = self.load_current_spec(max_tokens=max_tokens)
1053
+ if not current_spec:
1054
+ return []
1055
+
1056
+ current_feature = self.extract_feature_name(self.get_current_branch() or "")
1057
+ if not current_feature:
1058
+ return []
1059
+
1060
+ # Find all spec directories
1061
+ specs_dir = self.project_root / "specs"
1062
+ if not specs_dir.exists():
1063
+ return []
1064
+
1065
+ candidate_specs: list[tuple[Path, str]] = []
1066
+ for spec_dir in specs_dir.iterdir():
1067
+ if not spec_dir.is_dir():
1068
+ continue
1069
+ if spec_dir.name == current_feature:
1070
+ continue # Skip current spec
1071
+
1072
+ spec_path = spec_dir / "spec.md"
1073
+ if spec_path.exists():
1074
+ content = self._read_file(spec_path)
1075
+ if content:
1076
+ # Use title and summary for matching
1077
+ # Extract first 1000 chars for efficiency
1078
+ candidate_specs.append((spec_path, content[:1000]))
1079
+
1080
+ if not candidate_specs:
1081
+ return []
1082
+
1083
+ # Compute similarity scores
1084
+ candidate_texts = [text for _, text in candidate_specs]
1085
+ scores = compute_similarity_scores(current_spec.content[:1000], candidate_texts)
1086
+
1087
+ # Filter by threshold and sort by score
1088
+ scored_specs = [
1089
+ (score, path, text)
1090
+ for score, (path, text) in zip(scores, candidate_specs)
1091
+ if score >= similarity_threshold
1092
+ ]
1093
+ scored_specs.sort(key=lambda x: x[0], reverse=True)
1094
+
1095
+ # Load top specs
1096
+ related: list[ContextSource] = []
1097
+ for score, path, _ in scored_specs[:max_count]:
1098
+ content = self._read_file(path)
1099
+ if content is None:
1100
+ continue
1101
+
1102
+ truncated_content, was_truncated, original_tokens = truncate_content(
1103
+ content, max_tokens, path
1104
+ )
1105
+ token_count = estimate_tokens(truncated_content)
1106
+
1107
+ self._log_debug(
1108
+ f"Loaded related_spec ({path.parent.name}): "
1109
+ f"{token_count} tokens (similarity: {score:.2f})"
1110
+ )
1111
+
1112
+ related.append(
1113
+ ContextSource(
1114
+ source_type="related_specs",
1115
+ path=path,
1116
+ content=truncated_content,
1117
+ token_count=token_count,
1118
+ truncated=was_truncated,
1119
+ original_tokens=original_tokens if was_truncated else None,
1120
+ )
1121
+ )
1122
+
1123
+ return related