code-finder 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_context/__init__.py +33 -0
- claude_context/agentic_integration.py +309 -0
- claude_context/ast_chunker.py +646 -0
- claude_context/config.py +239 -0
- claude_context/context_manager.py +627 -0
- claude_context/embeddings.py +307 -0
- claude_context/embeddings_interface.py +226 -0
- claude_context/enhanced_ast_chunker.py +1129 -0
- claude_context/explorer.py +951 -0
- claude_context/explorer_with_context.py +1008 -0
- claude_context/indexer.py +893 -0
- claude_context/markdown_chunker.py +421 -0
- claude_context/mode_handler.py +1774 -0
- claude_context/query_metrics.py +164 -0
- claude_context/question_generator.py +800 -0
- claude_context/readme_extractor.py +485 -0
- claude_context/repository_adapter.py +399 -0
- claude_context/search.py +493 -0
- claude_context/skills/__init__.py +11 -0
- claude_context/skills/_cli_common.py +74 -0
- claude_context/skills/_index_manager.py +98 -0
- claude_context/skills/api_surface.py +219 -0
- claude_context/skills/evidence_retrieval.py +151 -0
- claude_context/skills/grounded_review.py +212 -0
- claude_context/synthesis/__init__.py +8 -0
- claude_context/synthesis/editor_agent.py +391 -0
- claude_context/synthesis/llm_synthesizer.py +153 -0
- claude_context/synthesis/logic_explainer.py +235 -0
- claude_context/synthesis/multi_review_pipeline.py +717 -0
- claude_context/synthesis/prompt_builder.py +439 -0
- claude_context/synthesis/providers.py +115 -0
- claude_context/synthesis/validators.py +458 -0
- code_finder-0.1.0.dist-info/METADATA +823 -0
- code_finder-0.1.0.dist-info/RECORD +37 -0
- code_finder-0.1.0.dist-info/WHEEL +5 -0
- code_finder-0.1.0.dist-info/entry_points.txt +4 -0
- code_finder-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1774 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Documentation Generation Mode Handler
|
|
3
|
+
|
|
4
|
+
Integrates the three documentation modes (AUTO, INTERACTIVE, HYBRID)
|
|
5
|
+
with the existing Vibe2Doc pipeline.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import json
|
|
10
|
+
import asyncio
|
|
11
|
+
import os
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Dict, Any, Optional, List, Tuple, TYPE_CHECKING
|
|
14
|
+
from enum import Enum
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
import sys
|
|
17
|
+
import hashlib
|
|
18
|
+
|
|
19
|
+
from .config import ClaudeContextConfig
|
|
20
|
+
from .embeddings import LocalEmbeddings
|
|
21
|
+
from .indexer import RepositoryIndexer
|
|
22
|
+
from .search import create_hybrid_searcher
|
|
23
|
+
from .context_manager import ExternalContextManager, ContextItem
|
|
24
|
+
from .explorer_with_context import InteractiveExplorerWithContext
|
|
25
|
+
from .repository_adapter import create_repository_adapter
|
|
26
|
+
from .readme_extractor import extract_from_readme, InstallationInfo, CodeExample
|
|
27
|
+
from .synthesis.logic_explainer import LogicExplainer
|
|
28
|
+
|
|
29
|
+
if TYPE_CHECKING: # pragma: no cover - type checking only
|
|
30
|
+
from .synthesis.editor_agent import EditorAgent, EditorReview
|
|
31
|
+
# simple_doc_generator removed - LLM synthesis is now mandatory
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class DocumentationMode(Enum):
|
|
37
|
+
"""Documentation generation mode"""
|
|
38
|
+
GENERATE = "generate" # Unified mode with optional interactive Q&A
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class ModeContext:
|
|
43
|
+
"""Context for documentation generation.
|
|
44
|
+
|
|
45
|
+
The two-pass workflow under design stores both the synthesized drafts
|
|
46
|
+
and (in a future change) the editor-agent revisions. When the editor
|
|
47
|
+
stage lands, this context should supply:
|
|
48
|
+
|
|
49
|
+
- raw_summaries: first-pass `SYNTHESIZED_*` sections emitted by the LLM
|
|
50
|
+
- editor_requests: structured critique prompts built from logic summaries
|
|
51
|
+
and repository evidence
|
|
52
|
+
- editor_outputs: finalized Markdown that replaces the synthesized drafts
|
|
53
|
+
|
|
54
|
+
We only document the contract here; the actual fields will be added when
|
|
55
|
+
the editor agent module is implemented.
|
|
56
|
+
"""
|
|
57
|
+
mode: DocumentationMode
|
|
58
|
+
repository_path: str
|
|
59
|
+
output_path: str = "./docs"
|
|
60
|
+
interactive: bool = False # Enable Q&A session
|
|
61
|
+
|
|
62
|
+
# Claude Context components
|
|
63
|
+
config: Optional[ClaudeContextConfig] = None
|
|
64
|
+
indexer: Optional[RepositoryIndexer] = None
|
|
65
|
+
searcher: Optional[Any] = None
|
|
66
|
+
explorer: Optional[InteractiveExplorerWithContext] = None
|
|
67
|
+
|
|
68
|
+
# External context
|
|
69
|
+
context_manager: Optional[ExternalContextManager] = None
|
|
70
|
+
context_files: List[str] = field(default_factory=list)
|
|
71
|
+
jira_config: Optional[Dict] = None
|
|
72
|
+
|
|
73
|
+
# Q&A session data
|
|
74
|
+
qa_history: List[Dict] = field(default_factory=list)
|
|
75
|
+
exploration_complete: bool = False
|
|
76
|
+
|
|
77
|
+
# Generated content
|
|
78
|
+
documentation_plan: List[str] = field(default_factory=list)
|
|
79
|
+
generated_docs: Dict[str, str] = field(default_factory=dict)
|
|
80
|
+
structured_evidence: Dict[str, Any] = field(default_factory=dict)
|
|
81
|
+
# Synthesis
|
|
82
|
+
synthesis_enabled: bool = False
|
|
83
|
+
synthesis_template: Optional[Dict[str, Any]] = None
|
|
84
|
+
user_focused: bool = False # Use user-focused questions (quickstart first) vs technical (architecture first)
|
|
85
|
+
|
|
86
|
+
# Logic explainer and rationale harvesting
|
|
87
|
+
logic_explainer: Optional[LogicExplainer] = None
|
|
88
|
+
logic_summaries: List[Dict[str, Any]] = field(default_factory=list)
|
|
89
|
+
rationale_records: Dict[str, Any] = field(default_factory=dict)
|
|
90
|
+
|
|
91
|
+
# Editor workflow integration (populated when editor pass is enabled)
|
|
92
|
+
editor_enabled: bool = False
|
|
93
|
+
editor_agent: Optional["EditorAgent"] = None
|
|
94
|
+
editor_reviews: List["EditorReview"] = field(default_factory=list)
|
|
95
|
+
editor_outputs: Dict[str, str] = field(default_factory=dict)
|
|
96
|
+
|
|
97
|
+
# Multi-README support (default: discover all READMEs)
|
|
98
|
+
single_readme_mode: bool = False # When True, only use root README (legacy behavior)
|
|
99
|
+
|
|
100
|
+
# Editor workflow placeholders (see Two-Pass Documentation plan in doc.plan.md).
|
|
101
|
+
# The upcoming editor agent will attach critique data and revised Markdown
|
|
102
|
+
# without altering the synthesized drafts. Fields such as
|
|
103
|
+
# `editor_feedback` and `editor_docs` will be introduced alongside the
|
|
104
|
+
# implementation so downstream components can publish the edited versions.
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class DocumentationModeHandler:
|
|
108
|
+
"""
|
|
109
|
+
Handles the three documentation generation modes.
|
|
110
|
+
Bridges Claude Context with the existing Vibe2Doc pipeline.
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
def __init__(self):
|
|
114
|
+
"""Initialize the mode handler"""
|
|
115
|
+
self.current_context: Optional[ModeContext] = None
|
|
116
|
+
logger.info("DocumentationModeHandler initialized")
|
|
117
|
+
|
|
118
|
+
async def initialize_mode(
|
|
119
|
+
self,
|
|
120
|
+
repository_path: str,
|
|
121
|
+
output_path: str = "./docs",
|
|
122
|
+
interactive: bool = False,
|
|
123
|
+
context_files: Optional[List[str]] = None,
|
|
124
|
+
jira_config: Optional[Dict] = None,
|
|
125
|
+
synthesis: bool = False,
|
|
126
|
+
synthesis_template: Optional[Dict[str, Any]] = None,
|
|
127
|
+
user_focused: bool = False,
|
|
128
|
+
editor_enabled: Optional[bool] = None,
|
|
129
|
+
single_readme_mode: bool = False,
|
|
130
|
+
) -> ModeContext:
|
|
131
|
+
"""
|
|
132
|
+
Initialize documentation generation.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
repository_path: Path to the repository
|
|
136
|
+
output_path: Where to save documentation
|
|
137
|
+
interactive: Enable Q&A session (default: False)
|
|
138
|
+
context_files: External context files
|
|
139
|
+
jira_config: Jira configuration
|
|
140
|
+
synthesis: Enable synthesis (deprecated, always True)
|
|
141
|
+
synthesis_template: Custom template
|
|
142
|
+
user_focused: Use user-focused prompts
|
|
143
|
+
single_readme_mode: Use only root README (legacy behavior). Default: False (multi-README)
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
Initialized ModeContext
|
|
147
|
+
"""
|
|
148
|
+
logger.info(f"Initializing {'interactive' if interactive else 'automatic'} documentation generation for {repository_path}")
|
|
149
|
+
|
|
150
|
+
# Create context
|
|
151
|
+
context = ModeContext(
|
|
152
|
+
mode=DocumentationMode.GENERATE,
|
|
153
|
+
repository_path=repository_path,
|
|
154
|
+
output_path=output_path,
|
|
155
|
+
interactive=interactive,
|
|
156
|
+
context_files=context_files or [],
|
|
157
|
+
jira_config=jira_config,
|
|
158
|
+
synthesis_enabled=True, # Always enabled
|
|
159
|
+
synthesis_template=synthesis_template,
|
|
160
|
+
user_focused=user_focused,
|
|
161
|
+
single_readme_mode=single_readme_mode
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
env_editor = os.getenv("V2D_ENABLE_EDITOR")
|
|
165
|
+
editor_flag = editor_enabled
|
|
166
|
+
if editor_flag is None and env_editor is not None:
|
|
167
|
+
editor_flag = env_editor.lower() in {"1", "true", "yes", "on"}
|
|
168
|
+
context.editor_enabled = bool(editor_flag)
|
|
169
|
+
|
|
170
|
+
# Initialize Claude Context components
|
|
171
|
+
context.config = ClaudeContextConfig()
|
|
172
|
+
context.config.milvus_db_path = f"./data/{Path(repository_path).name}_milvus.db"
|
|
173
|
+
|
|
174
|
+
# Initialize embeddings
|
|
175
|
+
embeddings = LocalEmbeddings(context.config.embedding_model)
|
|
176
|
+
|
|
177
|
+
# Initialize indexer
|
|
178
|
+
from .config import MilvusManager
|
|
179
|
+
milvus_manager = MilvusManager(context.config)
|
|
180
|
+
|
|
181
|
+
context.indexer = RepositoryIndexer(
|
|
182
|
+
config=context.config,
|
|
183
|
+
embeddings=embeddings,
|
|
184
|
+
milvus_manager=milvus_manager
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
# Index the repository
|
|
188
|
+
logger.info("Indexing repository...")
|
|
189
|
+
adapter = create_repository_adapter(repository_path)
|
|
190
|
+
stats = await self._index_repository_async(context.indexer, adapter)
|
|
191
|
+
logger.info(f"Indexed {stats['files_indexed']} files, {stats.get('chunks_created', stats.get('total_chunks', 0))} chunks")
|
|
192
|
+
|
|
193
|
+
# Create searcher
|
|
194
|
+
context.searcher = create_hybrid_searcher(
|
|
195
|
+
config=context.config,
|
|
196
|
+
embeddings=embeddings,
|
|
197
|
+
milvus_manager=milvus_manager
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# Initialize context manager (always available)
|
|
201
|
+
context.context_manager = ExternalContextManager()
|
|
202
|
+
|
|
203
|
+
# Initialize logic explainer for rationale-rich summaries
|
|
204
|
+
try:
|
|
205
|
+
context.logic_explainer = LogicExplainer()
|
|
206
|
+
except Exception as exc: # pragma: no cover - defensive
|
|
207
|
+
logger.warning("LogicExplainer initialization failed: %s", exc)
|
|
208
|
+
context.logic_explainer = None
|
|
209
|
+
|
|
210
|
+
# Initialize editor agent (optional second pass)
|
|
211
|
+
if context.editor_enabled:
|
|
212
|
+
try:
|
|
213
|
+
from .synthesis.editor_agent import EditorAgent
|
|
214
|
+
|
|
215
|
+
context.editor_agent = EditorAgent(searcher=context.searcher)
|
|
216
|
+
logger.info("Editor agent initialized (two-pass workflow enabled)")
|
|
217
|
+
except Exception as exc: # pragma: no cover - defensive
|
|
218
|
+
logger.warning("Editor agent initialization failed: %s", exc)
|
|
219
|
+
context.editor_enabled = False
|
|
220
|
+
|
|
221
|
+
# Load context files
|
|
222
|
+
for file_path in context_files or []:
|
|
223
|
+
try:
|
|
224
|
+
context.context_manager.add_file(file_path)
|
|
225
|
+
logger.info(f"Added context file: {file_path}")
|
|
226
|
+
except Exception as e:
|
|
227
|
+
logger.warning(f"Failed to add context file {file_path}: {e}")
|
|
228
|
+
|
|
229
|
+
# TODO: Add Jira integration if configured
|
|
230
|
+
if jira_config:
|
|
231
|
+
logger.info("Jira integration configured but not yet implemented")
|
|
232
|
+
|
|
233
|
+
# Initialize explorer ONLY if interactive mode
|
|
234
|
+
if interactive:
|
|
235
|
+
context.explorer = InteractiveExplorerWithContext(
|
|
236
|
+
searcher=context.searcher,
|
|
237
|
+
context_manager=context.context_manager,
|
|
238
|
+
enable_llm_fallback=True # Enable LLM query interpretation for conceptual questions
|
|
239
|
+
)
|
|
240
|
+
logger.info("Interactive mode: Q&A session enabled (with LLM query interpretation)")
|
|
241
|
+
|
|
242
|
+
self.current_context = context
|
|
243
|
+
return context
|
|
244
|
+
|
|
245
|
+
async def _index_repository_async(self, indexer, adapter):
|
|
246
|
+
"""Async wrapper for repository indexing"""
|
|
247
|
+
return await asyncio.to_thread(indexer.index_from_adapter, adapter)
|
|
248
|
+
|
|
249
|
+
# AUTO mode removed - LLM synthesis is now mandatory for documentation
|
|
250
|
+
# Use HYBRID mode for automatic documentation generation
|
|
251
|
+
|
|
252
|
+
async def run_generate_mode(self, context: ModeContext) -> Dict[str, str]:
|
|
253
|
+
"""
|
|
254
|
+
Run unified documentation generation with optional Q&A.
|
|
255
|
+
|
|
256
|
+
Flow:
|
|
257
|
+
1. Auto-extract evidence (README + code) - ALWAYS
|
|
258
|
+
2. [Optional] Interactive Q&A session
|
|
259
|
+
3. Generate documentation with LLM synthesis - ALWAYS
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
context: The mode context (includes interactive flag)
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
Generated documentation
|
|
266
|
+
"""
|
|
267
|
+
mode_label = "interactive" if context.interactive else "automatic"
|
|
268
|
+
logger.info(f"Running {mode_label} documentation generation...")
|
|
269
|
+
|
|
270
|
+
# Show context summary
|
|
271
|
+
context_summary = context.context_manager.get_summary() if context.context_manager else {}
|
|
272
|
+
logger.info(f"Using {context_summary.get('total_items', 0)} context items")
|
|
273
|
+
|
|
274
|
+
# Phase 1: Auto-extract evidence (ALWAYS)
|
|
275
|
+
if context.interactive:
|
|
276
|
+
print("\n🔍 Phase 1: Auto-extracting evidence...")
|
|
277
|
+
|
|
278
|
+
structured_evidence = self._collect_structured_evidence(context)
|
|
279
|
+
# NOTE: The forthcoming editor agent will consume both `structured_evidence`
|
|
280
|
+
# and `context.generated_docs` to critique the first draft. Keep these
|
|
281
|
+
# collections immutable so the second pass can diff against the raw output.
|
|
282
|
+
context.structured_evidence = structured_evidence
|
|
283
|
+
|
|
284
|
+
# Show what was auto-extracted
|
|
285
|
+
if context.interactive:
|
|
286
|
+
essentials = structured_evidence.get("essentials", {})
|
|
287
|
+
print(f"\n✅ Auto-extracted from README:")
|
|
288
|
+
if essentials.get("installation"):
|
|
289
|
+
print(f" ✓ Installation command")
|
|
290
|
+
if essentials.get("quickstart"):
|
|
291
|
+
print(f" ✓ Quickstart example")
|
|
292
|
+
if essentials.get("authentication"):
|
|
293
|
+
print(f" ✓ Authentication setup")
|
|
294
|
+
|
|
295
|
+
# Phase 2: Optional Q&A session
|
|
296
|
+
if context.interactive:
|
|
297
|
+
if not context.explorer:
|
|
298
|
+
logger.warning("Interactive mode requested but explorer not initialized")
|
|
299
|
+
else:
|
|
300
|
+
print(f"\n" + "="*60)
|
|
301
|
+
print(f"💬 Phase 2: Interactive Q&A Session")
|
|
302
|
+
print(f"="*60)
|
|
303
|
+
print(f"\nAuto-extraction complete. You can now:")
|
|
304
|
+
print(f" • Ask questions to add insights")
|
|
305
|
+
print(f" • Type 'generate docs' to create documentation")
|
|
306
|
+
print(f"")
|
|
307
|
+
|
|
308
|
+
if not context.exploration_complete:
|
|
309
|
+
await self._run_interactive_session(context)
|
|
310
|
+
|
|
311
|
+
# Show Q&A summary
|
|
312
|
+
if context.qa_history:
|
|
313
|
+
high_conf = sum(1 for qa in context.qa_history if qa.get("confidence", 0) >= 0.7)
|
|
314
|
+
print(f"\n📊 Q&A Summary:")
|
|
315
|
+
print(f" • {len(context.qa_history)} questions asked")
|
|
316
|
+
print(f" • {high_conf} high-confidence answers")
|
|
317
|
+
print(f" • These insights will be incorporated into documentation\n")
|
|
318
|
+
|
|
319
|
+
# Phase 3: LLM synthesis (ALWAYS REQUIRED)
|
|
320
|
+
try:
|
|
321
|
+
logger.info("🤖 Starting LLM synthesis...")
|
|
322
|
+
docs = self._run_synthesis_overview(context)
|
|
323
|
+
logger.info(f"✅ Synthesis completed: {len(docs)} sections generated")
|
|
324
|
+
except Exception as e:
|
|
325
|
+
logger.error(f"❌ Synthesis failed: {e}")
|
|
326
|
+
print(f"\n❌ Error: LLM synthesis failed: {e}")
|
|
327
|
+
print("\nLLM synthesis is required for documentation generation.")
|
|
328
|
+
print("Please check:")
|
|
329
|
+
print(" 1. API key is set correctly (ANTHROPIC_API_KEY or OPENAI_API_KEY)")
|
|
330
|
+
print(" 2. API key has sufficient quota")
|
|
331
|
+
print(" 3. Network connectivity")
|
|
332
|
+
raise RuntimeError(f"Documentation generation failed: {e}") from e
|
|
333
|
+
|
|
334
|
+
# Optional Phase 4: Editor pass (second stage)
|
|
335
|
+
if context.editor_enabled:
|
|
336
|
+
self._run_editor_pass(context, structured_evidence, docs)
|
|
337
|
+
|
|
338
|
+
context.generated_docs = docs
|
|
339
|
+
return docs
|
|
340
|
+
|
|
341
|
+
def _collect_structured_evidence(self, context: ModeContext) -> Dict[str, Any]:
|
|
342
|
+
"""
|
|
343
|
+
Collect evidence with clear priority tiers (EVIDENCE-FIRST approach).
|
|
344
|
+
|
|
345
|
+
Returns structured evidence instead of flat list:
|
|
346
|
+
- essentials: Installation, quickstart, auth from README (TIER 1 - MANDATORY)
|
|
347
|
+
- usage: Examples, configuration from README/samples (TIER 2 - HIGH PRIORITY)
|
|
348
|
+
- implementation: Code analysis, patterns (TIER 3 - SUPPLEMENTARY)
|
|
349
|
+
|
|
350
|
+
This ensures README fundamentals appear in docs before code internals.
|
|
351
|
+
"""
|
|
352
|
+
evidence = {
|
|
353
|
+
"essentials": {},
|
|
354
|
+
"usage": {},
|
|
355
|
+
"implementation": {},
|
|
356
|
+
"rationale": {
|
|
357
|
+
"logic": [],
|
|
358
|
+
"decisions": [],
|
|
359
|
+
"qa": []
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
# TIER 1: Extract essentials from README(s) (HIGHEST PRIORITY)
|
|
364
|
+
# Multi-README discovery: root README for essentials, module READMEs for usage
|
|
365
|
+
readmes = self._get_readme_contents(context)
|
|
366
|
+
|
|
367
|
+
if not readmes:
|
|
368
|
+
logger.warning("⚠️ No README found - essentials tier will be empty")
|
|
369
|
+
else:
|
|
370
|
+
# Determine primary README (root or shallowest if no root)
|
|
371
|
+
primary_readme_depth = readmes[0]['depth']
|
|
372
|
+
if primary_readme_depth > 0:
|
|
373
|
+
logger.warning(f"⚠️ No root README found. Using {readmes[0]['path']} as primary (depth={primary_readme_depth})")
|
|
374
|
+
|
|
375
|
+
# Initialize module_readmes list for usage tier
|
|
376
|
+
evidence["usage"]["module_readmes"] = []
|
|
377
|
+
|
|
378
|
+
for readme in readmes:
|
|
379
|
+
readme_path = readme['path']
|
|
380
|
+
readme_content = readme['content']
|
|
381
|
+
depth = readme['depth']
|
|
382
|
+
|
|
383
|
+
logger.info(f"📖 Extracting from {readme_path} (depth={depth})...")
|
|
384
|
+
readme_essentials = extract_from_readme(readme_content)
|
|
385
|
+
|
|
386
|
+
if depth == primary_readme_depth:
|
|
387
|
+
# PRIMARY README: Extract to essentials tier
|
|
388
|
+
# Installation
|
|
389
|
+
if readme_essentials.get('installation'):
|
|
390
|
+
install_info = readme_essentials['installation']
|
|
391
|
+
evidence["essentials"]["installation"] = {
|
|
392
|
+
"command": install_info.command,
|
|
393
|
+
"requirements": install_info.requirements,
|
|
394
|
+
"source": readme_path
|
|
395
|
+
}
|
|
396
|
+
logger.info(f" ✅ Installation: {install_info.command}")
|
|
397
|
+
|
|
398
|
+
# Quickstart - EXACT code
|
|
399
|
+
if readme_essentials.get('quickstart'):
|
|
400
|
+
quickstart = readme_essentials['quickstart']
|
|
401
|
+
evidence["essentials"]["quickstart"] = {
|
|
402
|
+
"code": quickstart.code,
|
|
403
|
+
"language": quickstart.language,
|
|
404
|
+
"source": readme_path
|
|
405
|
+
}
|
|
406
|
+
logger.info(f" ✅ Quickstart: {len(quickstart.code)} chars of {quickstart.language}")
|
|
407
|
+
|
|
408
|
+
# Authentication
|
|
409
|
+
if readme_essentials.get('authentication'):
|
|
410
|
+
auth = readme_essentials['authentication']
|
|
411
|
+
evidence["essentials"]["authentication"] = {
|
|
412
|
+
"code": auth.code,
|
|
413
|
+
"language": auth.language,
|
|
414
|
+
"source": readme_path
|
|
415
|
+
}
|
|
416
|
+
logger.info(f" ✅ Authentication: {len(auth.code)} chars")
|
|
417
|
+
|
|
418
|
+
# All code examples for usage tier
|
|
419
|
+
if readme_essentials.get('examples'):
|
|
420
|
+
evidence["usage"]["readme_examples"] = [
|
|
421
|
+
{
|
|
422
|
+
"code": ex.code,
|
|
423
|
+
"language": ex.language,
|
|
424
|
+
"context": ex.context
|
|
425
|
+
}
|
|
426
|
+
for ex in readme_essentials['examples']
|
|
427
|
+
]
|
|
428
|
+
logger.info(f" ✅ Examples: {len(readme_essentials['examples'])} found")
|
|
429
|
+
|
|
430
|
+
# Overview/description
|
|
431
|
+
if readme_essentials.get('overview'):
|
|
432
|
+
evidence["essentials"]["overview"] = readme_essentials['overview']
|
|
433
|
+
logger.info(f" ✅ Overview: {len(readme_essentials['overview'])} chars")
|
|
434
|
+
|
|
435
|
+
# Feature list
|
|
436
|
+
if readme_essentials.get('features'):
|
|
437
|
+
features = readme_essentials['features']
|
|
438
|
+
evidence["essentials"]["features"] = {
|
|
439
|
+
"items": features.items,
|
|
440
|
+
"section": features.section,
|
|
441
|
+
"source": readme_path
|
|
442
|
+
}
|
|
443
|
+
logger.info(f" ✅ Features: {len(features.items)} items")
|
|
444
|
+
|
|
445
|
+
# Tables (compatibility matrices, status tables)
|
|
446
|
+
if readme_essentials.get('tables'):
|
|
447
|
+
evidence["usage"]["tables"] = [
|
|
448
|
+
{
|
|
449
|
+
"raw_content": t.raw_content,
|
|
450
|
+
"headers": t.headers,
|
|
451
|
+
"row_count": t.row_count,
|
|
452
|
+
"section": t.section,
|
|
453
|
+
"has_status_indicators": t.has_status_indicators,
|
|
454
|
+
"source": readme_path
|
|
455
|
+
}
|
|
456
|
+
for t in readme_essentials['tables']
|
|
457
|
+
]
|
|
458
|
+
status_tables = sum(1 for t in readme_essentials['tables'] if t.has_status_indicators)
|
|
459
|
+
logger.info(f" ✅ Tables: {len(readme_essentials['tables'])} found ({status_tables} with status indicators)")
|
|
460
|
+
|
|
461
|
+
# Callouts (coming soon, experimental, warnings)
|
|
462
|
+
if readme_essentials.get('callouts'):
|
|
463
|
+
evidence["usage"]["callouts"] = [
|
|
464
|
+
{
|
|
465
|
+
"text": c.text,
|
|
466
|
+
"callout_type": c.callout_type,
|
|
467
|
+
"context": c.context,
|
|
468
|
+
"source": readme_path
|
|
469
|
+
}
|
|
470
|
+
for c in readme_essentials['callouts']
|
|
471
|
+
]
|
|
472
|
+
callout_types = set(c.callout_type for c in readme_essentials['callouts'])
|
|
473
|
+
logger.info(f" ✅ Callouts: {len(readme_essentials['callouts'])} found (types: {', '.join(callout_types)})")
|
|
474
|
+
|
|
475
|
+
else:
|
|
476
|
+
# MODULE README (depth > primary): Add to usage tier
|
|
477
|
+
module_data = {
|
|
478
|
+
'path': readme_path,
|
|
479
|
+
'depth': depth,
|
|
480
|
+
'overview': None,
|
|
481
|
+
'examples': [],
|
|
482
|
+
'features': None,
|
|
483
|
+
'tables': []
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
# Try structured extraction first
|
|
487
|
+
has_structured_data = False
|
|
488
|
+
|
|
489
|
+
if readme_essentials.get('overview'):
|
|
490
|
+
module_data['overview'] = readme_essentials['overview']
|
|
491
|
+
has_structured_data = True
|
|
492
|
+
|
|
493
|
+
if readme_essentials.get('examples'):
|
|
494
|
+
module_data['examples'] = [
|
|
495
|
+
{
|
|
496
|
+
"code": ex.code,
|
|
497
|
+
"language": ex.language,
|
|
498
|
+
"context": ex.context
|
|
499
|
+
}
|
|
500
|
+
for ex in readme_essentials['examples']
|
|
501
|
+
]
|
|
502
|
+
has_structured_data = True
|
|
503
|
+
|
|
504
|
+
if readme_essentials.get('features'):
|
|
505
|
+
features = readme_essentials['features']
|
|
506
|
+
module_data['features'] = {
|
|
507
|
+
"items": features.items,
|
|
508
|
+
"section": features.section
|
|
509
|
+
}
|
|
510
|
+
has_structured_data = True
|
|
511
|
+
|
|
512
|
+
if readme_essentials.get('tables'):
|
|
513
|
+
module_data['tables'] = [
|
|
514
|
+
{
|
|
515
|
+
"raw_content": t.raw_content,
|
|
516
|
+
"headers": t.headers,
|
|
517
|
+
"section": t.section
|
|
518
|
+
}
|
|
519
|
+
for t in readme_essentials['tables']
|
|
520
|
+
]
|
|
521
|
+
has_structured_data = True
|
|
522
|
+
|
|
523
|
+
# RAW-OVERVIEW FALLBACK for module READMEs only
|
|
524
|
+
# Only apply if no structured data was extracted
|
|
525
|
+
if not has_structured_data:
|
|
526
|
+
logger.info(f" ℹ️ Using raw-overview fallback for {readme_path}")
|
|
527
|
+
# Grab first paragraph (up to 1000 chars)
|
|
528
|
+
paragraphs = readme_content.split('\n\n')
|
|
529
|
+
first_paragraph = ''
|
|
530
|
+
for para in paragraphs:
|
|
531
|
+
para = para.strip()
|
|
532
|
+
# Skip headers and empty lines
|
|
533
|
+
if para and not para.startswith('#'):
|
|
534
|
+
first_paragraph = para[:1000]
|
|
535
|
+
break
|
|
536
|
+
|
|
537
|
+
if first_paragraph:
|
|
538
|
+
module_data['overview'] = first_paragraph
|
|
539
|
+
module_data['raw_fallback'] = True
|
|
540
|
+
|
|
541
|
+
# Also grab any code blocks as examples
|
|
542
|
+
import re
|
|
543
|
+
code_block_pattern = r'```(\w*)\n(.*?)```'
|
|
544
|
+
code_blocks = re.findall(code_block_pattern, readme_content, re.DOTALL)
|
|
545
|
+
for lang, code in code_blocks[:3]: # Limit to 3 code blocks
|
|
546
|
+
module_data['examples'].append({
|
|
547
|
+
"code": code.strip()[:500],
|
|
548
|
+
"language": lang or "text",
|
|
549
|
+
"context": f"From {readme_path}"
|
|
550
|
+
})
|
|
551
|
+
|
|
552
|
+
# Only add if we have some content
|
|
553
|
+
if module_data['overview'] or module_data['examples']:
|
|
554
|
+
evidence["usage"]["module_readmes"].append(module_data)
|
|
555
|
+
ex_count = len(module_data['examples'])
|
|
556
|
+
fallback_marker = " (raw fallback)" if module_data.get('raw_fallback') else ""
|
|
557
|
+
logger.info(f" ✅ Module README: {readme_path} - overview={bool(module_data['overview'])}, examples={ex_count}{fallback_marker}")
|
|
558
|
+
else:
|
|
559
|
+
logger.debug(f" ⏭️ Skipping {readme_path} - no extractable content")
|
|
560
|
+
|
|
561
|
+
# Clean up empty module_readmes list
|
|
562
|
+
if not evidence["usage"]["module_readmes"]:
|
|
563
|
+
del evidence["usage"]["module_readmes"]
|
|
564
|
+
|
|
565
|
+
# TIER 2: Q&A insights (HIGH PRIORITY - User-identified topics)
|
|
566
|
+
if context.qa_history:
|
|
567
|
+
logger.info("💬 Collecting Q&A insights...")
|
|
568
|
+
qa_insights = []
|
|
569
|
+
|
|
570
|
+
for qa in context.qa_history:
|
|
571
|
+
# Only include high-confidence Q&A (>= 0.5)
|
|
572
|
+
confidence = qa.get("confidence", 0)
|
|
573
|
+
if confidence >= 0.5:
|
|
574
|
+
qa_insights.append({
|
|
575
|
+
"question": qa["question"],
|
|
576
|
+
"answer": qa["answer"],
|
|
577
|
+
"confidence": confidence,
|
|
578
|
+
"context_used": qa.get("context_used", False)
|
|
579
|
+
})
|
|
580
|
+
|
|
581
|
+
if qa_insights:
|
|
582
|
+
evidence["usage"]["qa_insights"] = qa_insights
|
|
583
|
+
high_conf_count = sum(1 for qa in qa_insights if qa["confidence"] >= 0.7)
|
|
584
|
+
logger.info(f" ✅ Q&A insights: {len(qa_insights)} answers ({high_conf_count} high-confidence)")
|
|
585
|
+
|
|
586
|
+
# TIER 2.5: Code docstrings (HIGH PRIORITY - Author's documented intent)
|
|
587
|
+
if context.indexer and hasattr(context.indexer, 'last_indexed_chunks'):
|
|
588
|
+
logger.info("📝 Extracting docstrings from code...")
|
|
589
|
+
docstrings = []
|
|
590
|
+
chunks = context.indexer.last_indexed_chunks or []
|
|
591
|
+
|
|
592
|
+
for chunk in chunks:
|
|
593
|
+
# Chunks are stored as dictionaries, access via .get()
|
|
594
|
+
docstring = chunk.get('docstring') if isinstance(chunk, dict) else getattr(chunk, 'docstring', None)
|
|
595
|
+
if docstring and len(docstring.strip()) > 20: # Skip trivial docstrings
|
|
596
|
+
if isinstance(chunk, dict):
|
|
597
|
+
docstrings.append({
|
|
598
|
+
"source": chunk.get('file_path', 'unknown'),
|
|
599
|
+
"chunk_type": chunk.get('chunk_type', 'code'),
|
|
600
|
+
"name": chunk.get('chunk_name', 'unnamed'),
|
|
601
|
+
"docstring": docstring.strip(),
|
|
602
|
+
"start_line": chunk.get('start_line', 0),
|
|
603
|
+
"end_line": chunk.get('end_line', 0),
|
|
604
|
+
})
|
|
605
|
+
else:
|
|
606
|
+
# Fallback for CodeChunk objects
|
|
607
|
+
docstrings.append({
|
|
608
|
+
"source": getattr(chunk, 'file_path', 'unknown'),
|
|
609
|
+
"chunk_type": getattr(chunk, 'chunk_type', 'code'),
|
|
610
|
+
"name": getattr(chunk, 'name', 'unnamed'),
|
|
611
|
+
"docstring": docstring.strip(),
|
|
612
|
+
"start_line": getattr(chunk, 'start_line', 0),
|
|
613
|
+
"end_line": getattr(chunk, 'end_line', 0),
|
|
614
|
+
})
|
|
615
|
+
|
|
616
|
+
# Sort by docstring length (longer = more informative) and limit
|
|
617
|
+
docstrings.sort(key=lambda d: len(d["docstring"]), reverse=True)
|
|
618
|
+
docstrings = docstrings[:20] # Keep top 20 most informative
|
|
619
|
+
|
|
620
|
+
if docstrings:
|
|
621
|
+
evidence["usage"]["docstrings"] = docstrings
|
|
622
|
+
logger.info(f" ✅ Docstrings: {len(docstrings)} extracted from code")
|
|
623
|
+
|
|
624
|
+
# TIER 3: Code patterns from analysis (SUPPLEMENTARY)
|
|
625
|
+
if context.searcher:
|
|
626
|
+
logger.info("🔍 Searching code for implementation patterns...")
|
|
627
|
+
terms = self._derive_search_terms(context, max_terms=15)
|
|
628
|
+
code_results = []
|
|
629
|
+
|
|
630
|
+
for term in terms[:15]: # Increased from 10 for better coverage
|
|
631
|
+
try:
|
|
632
|
+
results = context.searcher.search(term, limit=8) # Increased from 5
|
|
633
|
+
for r in results:
|
|
634
|
+
code_results.append({
|
|
635
|
+
"source": f"{r.file_path}:{r.start_line}-{r.end_line}",
|
|
636
|
+
"snippet": (r.content or "").strip()[:600]
|
|
637
|
+
})
|
|
638
|
+
except Exception:
|
|
639
|
+
pass
|
|
640
|
+
|
|
641
|
+
evidence["implementation"]["code_patterns"] = code_results
|
|
642
|
+
logger.info(f" ✅ Found {len(code_results)} code patterns")
|
|
643
|
+
|
|
644
|
+
# TIER 4: Rationale (WHY) evidence from decisions, logic explainer, and Q&A
|
|
645
|
+
rationale = self._collect_rationale_evidence(context)
|
|
646
|
+
if rationale:
|
|
647
|
+
evidence["rationale"] = rationale
|
|
648
|
+
|
|
649
|
+
logger.info(f"\n📊 Evidence collection complete:")
|
|
650
|
+
logger.info(f" TIER 1 (Essentials): {len(evidence['essentials'])} items")
|
|
651
|
+
logger.info(f" TIER 2 (Usage): {len(evidence['usage'])} items")
|
|
652
|
+
logger.info(f" TIER 2.5 (Docstrings): {len(evidence['usage'].get('docstrings', []))} extracted")
|
|
653
|
+
logger.info(f" TIER 3 (Implementation): {len(evidence['implementation'])} items")
|
|
654
|
+
logger.info(
|
|
655
|
+
" TIER 4 (Rationale): %s logic summaries, %s context decisions, %s QA insights",
|
|
656
|
+
len(evidence["rationale"].get("logic", [])),
|
|
657
|
+
len(evidence["rationale"].get("decisions", [])),
|
|
658
|
+
len(evidence["rationale"].get("qa", [])),
|
|
659
|
+
)
|
|
660
|
+
|
|
661
|
+
context.rationale_records = evidence["rationale"]
|
|
662
|
+
return evidence
|
|
663
|
+
|
|
664
|
+
def _run_editor_pass(
|
|
665
|
+
self,
|
|
666
|
+
context: ModeContext,
|
|
667
|
+
structured_evidence: Dict[str, Any],
|
|
668
|
+
synthesized_docs: Dict[str, str],
|
|
669
|
+
) -> None:
|
|
670
|
+
"""Run the optional editor workflow, storing revisions alongside drafts."""
|
|
671
|
+
|
|
672
|
+
if not context.editor_agent:
|
|
673
|
+
logger.info("Editor agent unavailable; skipping editor pass")
|
|
674
|
+
return
|
|
675
|
+
|
|
676
|
+
sections = {
|
|
677
|
+
name: content
|
|
678
|
+
for name, content in synthesized_docs.items()
|
|
679
|
+
if name.startswith("SYNTHESIZED_") and content.strip()
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
if not sections:
|
|
683
|
+
logger.info("No synthesized sections found for editor pass")
|
|
684
|
+
return
|
|
685
|
+
|
|
686
|
+
logger.info("Editor agent reviewing %d sections", len(sections))
|
|
687
|
+
reviews = context.editor_agent.review_sections(
|
|
688
|
+
sections,
|
|
689
|
+
structured_evidence=structured_evidence,
|
|
690
|
+
repository_path=context.repository_path,
|
|
691
|
+
)
|
|
692
|
+
|
|
693
|
+
context.editor_reviews = reviews
|
|
694
|
+
for review in reviews:
|
|
695
|
+
filename = f"editor/{review.section}"
|
|
696
|
+
context.editor_outputs[filename] = review.revised_text
|
|
697
|
+
synthesized_docs[filename] = review.revised_text
|
|
698
|
+
|
|
699
|
+
logger.info("Editor pass completed with %d revised sections", len(reviews))
|
|
700
|
+
|
|
701
|
+
def _get_readme_content(self, context: ModeContext) -> Optional[str]:
|
|
702
|
+
"""
|
|
703
|
+
Legacy method: Get only root README content.
|
|
704
|
+
Used when --single-readme flag is set or for backward compatibility.
|
|
705
|
+
|
|
706
|
+
Returns:
|
|
707
|
+
README markdown content or None
|
|
708
|
+
"""
|
|
709
|
+
# Try to get from context manager first
|
|
710
|
+
if context.context_manager:
|
|
711
|
+
for item in context.context_manager.get_all_context():
|
|
712
|
+
if 'readme' in item.source.lower():
|
|
713
|
+
return item.content
|
|
714
|
+
|
|
715
|
+
# Try to read from repository directly
|
|
716
|
+
repo_path = Path(context.repository_path)
|
|
717
|
+
for readme_name in ['README.md', 'readme.md', 'Readme.md', 'README.rst']:
|
|
718
|
+
readme_path = repo_path / readme_name
|
|
719
|
+
if readme_path.exists():
|
|
720
|
+
try:
|
|
721
|
+
return readme_path.read_text()
|
|
722
|
+
except Exception as e:
|
|
723
|
+
logger.warning(f"Failed to read {readme_path}: {e}")
|
|
724
|
+
|
|
725
|
+
return None
|
|
726
|
+
|
|
727
|
+
def _get_readme_contents(self, context: ModeContext) -> List[Dict[str, Any]]:
|
|
728
|
+
"""
|
|
729
|
+
Discover and return all README files in the repository.
|
|
730
|
+
|
|
731
|
+
Multi-README discovery with priority tiers:
|
|
732
|
+
- Root README (depth=0): Used for essentials (installation, quickstart)
|
|
733
|
+
- Module READMEs (depth>0): Added to usage tier as supplementary evidence
|
|
734
|
+
|
|
735
|
+
Returns:
|
|
736
|
+
List of README dicts with keys: path, content, depth, absolute_path,
|
|
737
|
+
size_chars, truncated
|
|
738
|
+
"""
|
|
739
|
+
# Legacy mode: only return root README
|
|
740
|
+
if getattr(context, 'single_readme_mode', False):
|
|
741
|
+
readme_content = self._get_readme_content(context)
|
|
742
|
+
if readme_content:
|
|
743
|
+
repo_path = Path(context.repository_path)
|
|
744
|
+
# Find actual README path
|
|
745
|
+
for readme_name in ['README.md', 'readme.md', 'Readme.md', 'README.rst']:
|
|
746
|
+
readme_path = repo_path / readme_name
|
|
747
|
+
if readme_path.exists():
|
|
748
|
+
return [{
|
|
749
|
+
'path': readme_name,
|
|
750
|
+
'content': readme_content,
|
|
751
|
+
'depth': 0,
|
|
752
|
+
'absolute_path': str(readme_path),
|
|
753
|
+
'size_chars': len(readme_content),
|
|
754
|
+
'truncated': False
|
|
755
|
+
}]
|
|
756
|
+
return []
|
|
757
|
+
|
|
758
|
+
# Multi-README discovery mode
|
|
759
|
+
repo_path = Path(context.repository_path)
|
|
760
|
+
readmes: List[Dict[str, Any]] = []
|
|
761
|
+
total_chars = 0
|
|
762
|
+
seen_paths: set = set() # Track canonical paths to avoid duplicates from symlinks
|
|
763
|
+
|
|
764
|
+
# Load .v2dignore patterns if present
|
|
765
|
+
v2dignore_patterns = self._load_v2dignore(repo_path)
|
|
766
|
+
|
|
767
|
+
# Discover all README files recursively
|
|
768
|
+
# On case-insensitive filesystems (macOS), rglob may return the same file
|
|
769
|
+
# multiple times with different case patterns. We normalize to lowercase
|
|
770
|
+
# for deduplication.
|
|
771
|
+
for readme_pattern in ['README.md', 'readme.md', 'Readme.md', 'README.rst']:
|
|
772
|
+
for readme_path in repo_path.rglob(readme_pattern):
|
|
773
|
+
# Handle symlinks and normalize for case-insensitive deduplication
|
|
774
|
+
try:
|
|
775
|
+
canonical = readme_path.resolve()
|
|
776
|
+
# Normalize to lowercase for case-insensitive filesystem deduplication
|
|
777
|
+
canonical_lower = str(canonical).lower()
|
|
778
|
+
if canonical_lower in seen_paths:
|
|
779
|
+
continue
|
|
780
|
+
seen_paths.add(canonical_lower)
|
|
781
|
+
except Exception:
|
|
782
|
+
# Skip broken symlinks
|
|
783
|
+
continue
|
|
784
|
+
|
|
785
|
+
# Calculate depth
|
|
786
|
+
try:
|
|
787
|
+
relative = readme_path.relative_to(repo_path)
|
|
788
|
+
depth = len(relative.parts) - 1
|
|
789
|
+
except ValueError:
|
|
790
|
+
continue
|
|
791
|
+
|
|
792
|
+
# Skip if too deep (max 4 levels)
|
|
793
|
+
if depth > 4:
|
|
794
|
+
logger.debug(f"Skipping README too deep (depth={depth}): {readme_path}")
|
|
795
|
+
continue
|
|
796
|
+
|
|
797
|
+
# Skip if in ignored directory
|
|
798
|
+
if self._should_ignore_readme(readme_path, repo_path, v2dignore_patterns):
|
|
799
|
+
logger.debug(f"Skipping ignored README: {readme_path}")
|
|
800
|
+
continue
|
|
801
|
+
|
|
802
|
+
# Read content
|
|
803
|
+
try:
|
|
804
|
+
content = readme_path.read_text(encoding='utf-8')
|
|
805
|
+
except Exception as e:
|
|
806
|
+
logger.warning(f"Failed to read {readme_path}: {e}")
|
|
807
|
+
continue
|
|
808
|
+
|
|
809
|
+
# Validate quality
|
|
810
|
+
if not self._is_valid_readme(content):
|
|
811
|
+
logger.debug(f"Skipping low-quality README: {readme_path}")
|
|
812
|
+
continue
|
|
813
|
+
|
|
814
|
+
# Truncate if oversized (10K char limit per README)
|
|
815
|
+
truncated = False
|
|
816
|
+
if len(content) > 10_000:
|
|
817
|
+
content = content[:10_000] + "\n\n[... truncated ...]"
|
|
818
|
+
truncated = True
|
|
819
|
+
logger.info(f"Truncated README > 10K chars: {readme_path}")
|
|
820
|
+
|
|
821
|
+
# Check total budget (100K chars max)
|
|
822
|
+
if total_chars + len(content) > 100_000:
|
|
823
|
+
logger.warning(f"Reached 100K char total limit, stopping at {len(readmes)} READMEs")
|
|
824
|
+
break
|
|
825
|
+
|
|
826
|
+
readmes.append({
|
|
827
|
+
'path': str(relative),
|
|
828
|
+
'content': content,
|
|
829
|
+
'depth': depth,
|
|
830
|
+
'absolute_path': str(readme_path),
|
|
831
|
+
'size_chars': len(content),
|
|
832
|
+
'truncated': truncated
|
|
833
|
+
})
|
|
834
|
+
total_chars += len(content)
|
|
835
|
+
|
|
836
|
+
# Sort deterministically: depth, then directory priority, then alphabetical
|
|
837
|
+
readmes.sort(key=lambda r: (
|
|
838
|
+
r['depth'],
|
|
839
|
+
self._calculate_directory_priority(r['path']),
|
|
840
|
+
r['path']
|
|
841
|
+
))
|
|
842
|
+
|
|
843
|
+
# Limit to 15 READMEs
|
|
844
|
+
if len(readmes) > 15:
|
|
845
|
+
logger.warning(f"Found {len(readmes)} READMEs, limiting to 15")
|
|
846
|
+
readmes = readmes[:15]
|
|
847
|
+
total_chars = sum(r['size_chars'] for r in readmes)
|
|
848
|
+
|
|
849
|
+
# Log summary with token estimate
|
|
850
|
+
est_tokens = int(total_chars / 1.5) # Rough estimate: ~1.5 chars per token
|
|
851
|
+
logger.info(f"📖 Discovered {len(readmes)} README files (max depth=4, limit=15)")
|
|
852
|
+
logger.info(f"📊 Total README content: {total_chars:,} chars (~{est_tokens:,} tokens)")
|
|
853
|
+
|
|
854
|
+
if est_tokens > 60_000:
|
|
855
|
+
logger.warning(f"⚠️ README content is large ({est_tokens:,} tokens). May impact context budget.")
|
|
856
|
+
|
|
857
|
+
for readme in readmes:
|
|
858
|
+
truncated_marker = " [TRUNCATED]" if readme.get('truncated') else ""
|
|
859
|
+
logger.info(f" • {readme['path']} (depth={readme['depth']}, {readme['size_chars']:,} chars{truncated_marker})")
|
|
860
|
+
|
|
861
|
+
return readmes
|
|
862
|
+
|
|
863
|
+
def _load_v2dignore(self, repo_path: Path) -> List[str]:
|
|
864
|
+
"""
|
|
865
|
+
Load .v2dignore patterns from repository root.
|
|
866
|
+
|
|
867
|
+
Returns list of gitignore-style patterns.
|
|
868
|
+
"""
|
|
869
|
+
v2dignore_path = repo_path / '.v2dignore'
|
|
870
|
+
patterns: List[str] = []
|
|
871
|
+
|
|
872
|
+
if v2dignore_path.exists():
|
|
873
|
+
try:
|
|
874
|
+
content = v2dignore_path.read_text()
|
|
875
|
+
for line in content.splitlines():
|
|
876
|
+
line = line.strip()
|
|
877
|
+
# Skip empty lines and comments
|
|
878
|
+
if line and not line.startswith('#'):
|
|
879
|
+
patterns.append(line)
|
|
880
|
+
logger.info(f"Loaded {len(patterns)} patterns from .v2dignore")
|
|
881
|
+
except Exception as e:
|
|
882
|
+
logger.warning(f"Failed to read .v2dignore: {e}")
|
|
883
|
+
|
|
884
|
+
return patterns
|
|
885
|
+
|
|
886
|
+
def _should_ignore_readme(
|
|
887
|
+
self,
|
|
888
|
+
readme_path: Path,
|
|
889
|
+
repo_root: Path,
|
|
890
|
+
v2dignore_patterns: Optional[List[str]] = None
|
|
891
|
+
) -> bool:
|
|
892
|
+
"""
|
|
893
|
+
Check if README should be ignored based on directory patterns.
|
|
894
|
+
|
|
895
|
+
Args:
|
|
896
|
+
readme_path: Absolute path to README file
|
|
897
|
+
repo_root: Repository root path
|
|
898
|
+
v2dignore_patterns: Custom ignore patterns from .v2dignore
|
|
899
|
+
|
|
900
|
+
Returns:
|
|
901
|
+
True if README should be ignored
|
|
902
|
+
"""
|
|
903
|
+
try:
|
|
904
|
+
relative = readme_path.relative_to(repo_root)
|
|
905
|
+
path_str = str(relative)
|
|
906
|
+
except ValueError:
|
|
907
|
+
return True # Not under repo root
|
|
908
|
+
|
|
909
|
+
# Hardcoded ignore patterns
|
|
910
|
+
hardcoded_patterns = [
|
|
911
|
+
'node_modules/', 'venv/', '.venv/', 'env/',
|
|
912
|
+
'.git/', '.github/', '.gitlab/',
|
|
913
|
+
'__pycache__/', '.pytest_cache/', '.mypy_cache/',
|
|
914
|
+
'dist/', 'build/', 'target/',
|
|
915
|
+
'vendor/', 'third_party/',
|
|
916
|
+
'.tox/', '.nox/', '.eggs/',
|
|
917
|
+
'site-packages/',
|
|
918
|
+
]
|
|
919
|
+
|
|
920
|
+
for pattern in hardcoded_patterns:
|
|
921
|
+
if pattern in path_str:
|
|
922
|
+
return True
|
|
923
|
+
|
|
924
|
+
# Check .v2dignore patterns
|
|
925
|
+
if v2dignore_patterns:
|
|
926
|
+
import fnmatch
|
|
927
|
+
for pattern in v2dignore_patterns:
|
|
928
|
+
# Support both directory patterns and glob patterns
|
|
929
|
+
if pattern.endswith('/'):
|
|
930
|
+
# Directory pattern: match if path contains this directory
|
|
931
|
+
if pattern.rstrip('/') in path_str:
|
|
932
|
+
return True
|
|
933
|
+
else:
|
|
934
|
+
# Glob pattern: match against relative path
|
|
935
|
+
if fnmatch.fnmatch(path_str, pattern) or fnmatch.fnmatch(path_str, f"**/{pattern}"):
|
|
936
|
+
return True
|
|
937
|
+
|
|
938
|
+
return False
|
|
939
|
+
|
|
940
|
+
def _is_valid_readme(self, content: str) -> bool:
|
|
941
|
+
"""
|
|
942
|
+
Check if README content is substantial enough to include.
|
|
943
|
+
|
|
944
|
+
Filters out:
|
|
945
|
+
- Stub READMEs (< 200 chars)
|
|
946
|
+
- READMEs with excessive TODOs (likely incomplete)
|
|
947
|
+
|
|
948
|
+
Args:
|
|
949
|
+
content: README file content
|
|
950
|
+
|
|
951
|
+
Returns:
|
|
952
|
+
True if README is valid for inclusion
|
|
953
|
+
"""
|
|
954
|
+
# Too short (likely a stub)
|
|
955
|
+
if len(content) < 200:
|
|
956
|
+
return False
|
|
957
|
+
|
|
958
|
+
# Too many TODOs relative to size (likely incomplete/placeholder)
|
|
959
|
+
todo_count = content.upper().count('TODO')
|
|
960
|
+
if todo_count > 5 and len(content) < 1000:
|
|
961
|
+
logger.debug(f"Skipping README with {todo_count} TODOs and < 1000 chars (likely stub)")
|
|
962
|
+
return False
|
|
963
|
+
|
|
964
|
+
return True
|
|
965
|
+
|
|
966
|
+
def _calculate_directory_priority(self, path: str) -> int:
|
|
967
|
+
"""
|
|
968
|
+
Calculate priority score for README based on directory.
|
|
969
|
+
|
|
970
|
+
Lower score = higher priority.
|
|
971
|
+
Used for deterministic sorting when multiple READMEs at same depth.
|
|
972
|
+
|
|
973
|
+
Args:
|
|
974
|
+
path: Relative path to README
|
|
975
|
+
|
|
976
|
+
Returns:
|
|
977
|
+
Priority score (0=highest, 3=lowest)
|
|
978
|
+
"""
|
|
979
|
+
path_lower = path.lower()
|
|
980
|
+
|
|
981
|
+
# Highest priority: common source directories
|
|
982
|
+
if any(d in path_lower for d in ['src/', 'lib/', 'core/']):
|
|
983
|
+
return 0
|
|
984
|
+
|
|
985
|
+
# Medium priority: package/module directories
|
|
986
|
+
if any(d in path_lower for d in ['packages/', 'modules/', 'components/']):
|
|
987
|
+
return 1
|
|
988
|
+
|
|
989
|
+
# Lower priority: docs, examples, tests
|
|
990
|
+
if any(d in path_lower for d in ['docs/', 'examples/', 'tests/']):
|
|
991
|
+
return 2
|
|
992
|
+
|
|
993
|
+
# Default priority
|
|
994
|
+
return 3
|
|
995
|
+
|
|
996
|
+
def _collect_context_evidence(
|
|
997
|
+
self,
|
|
998
|
+
context: ModeContext,
|
|
999
|
+
limit: int = 5,
|
|
1000
|
+
structured: Optional[Dict[str, Any]] = None
|
|
1001
|
+
) -> List[Dict[str, str]]:
|
|
1002
|
+
"""
|
|
1003
|
+
Legacy method for backward compatibility.
|
|
1004
|
+
Converts structured evidence to flat list format.
|
|
1005
|
+
"""
|
|
1006
|
+
structured = structured or self._collect_structured_evidence(context)
|
|
1007
|
+
|
|
1008
|
+
# Flatten to old format (for now)
|
|
1009
|
+
evidence = []
|
|
1010
|
+
|
|
1011
|
+
# Add essentials first
|
|
1012
|
+
if structured["essentials"].get("installation"):
|
|
1013
|
+
inst = structured["essentials"]["installation"]
|
|
1014
|
+
evidence.append({
|
|
1015
|
+
"source": f"{inst['source']} (installation)",
|
|
1016
|
+
"snippet": f"Installation: {inst['command']}\nRequirements: {', '.join(inst.get('requirements', []))}"
|
|
1017
|
+
})
|
|
1018
|
+
|
|
1019
|
+
if structured["essentials"].get("quickstart"):
|
|
1020
|
+
qs = structured["essentials"]["quickstart"]
|
|
1021
|
+
evidence.append({
|
|
1022
|
+
"source": f"{qs['source']} (quickstart)",
|
|
1023
|
+
"snippet": f"```{qs['language']}\n{qs['code']}\n```"
|
|
1024
|
+
})
|
|
1025
|
+
|
|
1026
|
+
if structured["essentials"].get("authentication"):
|
|
1027
|
+
auth = structured["essentials"]["authentication"]
|
|
1028
|
+
evidence.append({
|
|
1029
|
+
"source": f"{auth['source']} (authentication)",
|
|
1030
|
+
"snippet": f"```{auth['language']}\n{auth['code']}\n```"
|
|
1031
|
+
})
|
|
1032
|
+
|
|
1033
|
+
# Add usage examples
|
|
1034
|
+
for ex in structured["usage"].get("readme_examples", [])[:3]:
|
|
1035
|
+
evidence.append({
|
|
1036
|
+
"source": f"README ({ex['context']})",
|
|
1037
|
+
"snippet": f"```{ex['language']}\n{ex['code'][:400]}\n```"
|
|
1038
|
+
})
|
|
1039
|
+
|
|
1040
|
+
# Add Q&A insights (TIER 2)
|
|
1041
|
+
for qa in structured["usage"].get("qa_insights", [])[:3]:
|
|
1042
|
+
evidence.append({
|
|
1043
|
+
"source": f"Q&A Session (confidence: {qa['confidence']:.0%})",
|
|
1044
|
+
"snippet": f"Q: {qa['question']}\n\nA: {qa['answer'][:400]}"
|
|
1045
|
+
})
|
|
1046
|
+
|
|
1047
|
+
# Add code patterns last (TIER 3)
|
|
1048
|
+
evidence.extend(structured["implementation"].get("code_patterns", [])[:3])
|
|
1049
|
+
|
|
1050
|
+
return evidence
|
|
1051
|
+
|
|
1052
|
+
def _collect_rationale_evidence(self, context: ModeContext) -> Dict[str, Any]:
|
|
1053
|
+
"""Gather rationale-focused evidence (decisions, logic summaries, QA insights)."""
|
|
1054
|
+
|
|
1055
|
+
rationale: Dict[str, Any] = {
|
|
1056
|
+
"logic": [],
|
|
1057
|
+
"decisions": [],
|
|
1058
|
+
"qa": []
|
|
1059
|
+
}
|
|
1060
|
+
rationale_errors: List[str] = []
|
|
1061
|
+
index_payloads: List[Dict[str, Any]] = []
|
|
1062
|
+
|
|
1063
|
+
decision_items: List[ContextItem] = []
|
|
1064
|
+
|
|
1065
|
+
if context.context_manager:
|
|
1066
|
+
for ctx_type in ("decisions", "design_docs", "requirements"):
|
|
1067
|
+
try:
|
|
1068
|
+
items = context.context_manager.get_all_context(ctx_type)
|
|
1069
|
+
except Exception as exc: # pragma: no cover - defensive
|
|
1070
|
+
logger.warning("Failed to read %s context: %s", ctx_type, exc)
|
|
1071
|
+
rationale_errors.append(f"Context load failed for {ctx_type}: {exc}")
|
|
1072
|
+
items = []
|
|
1073
|
+
for item in items:
|
|
1074
|
+
decision_items.append(item)
|
|
1075
|
+
rationale["decisions"].append({
|
|
1076
|
+
"id": item.id,
|
|
1077
|
+
"type": ctx_type,
|
|
1078
|
+
"summary": item.get_summary(),
|
|
1079
|
+
"source": item.source,
|
|
1080
|
+
})
|
|
1081
|
+
decision_text = (item.content or "")[:2000]
|
|
1082
|
+
index_payloads.append({
|
|
1083
|
+
"record_id": f"decision::{item.id}",
|
|
1084
|
+
"record_type": f"rationale_decision::{ctx_type}",
|
|
1085
|
+
"text": decision_text or item.get_summary(),
|
|
1086
|
+
"source_path": item.source or "",
|
|
1087
|
+
"title": item.get_summary(),
|
|
1088
|
+
})
|
|
1089
|
+
|
|
1090
|
+
logic_summaries: List[Dict[str, Any]] = []
|
|
1091
|
+
if (
|
|
1092
|
+
context.logic_explainer
|
|
1093
|
+
and context.indexer
|
|
1094
|
+
and getattr(context.indexer, "last_indexed_chunks", None)
|
|
1095
|
+
):
|
|
1096
|
+
try:
|
|
1097
|
+
logic_summaries = context.logic_explainer.summarize_chunks(
|
|
1098
|
+
context.indexer.last_indexed_chunks,
|
|
1099
|
+
decision_items,
|
|
1100
|
+
)
|
|
1101
|
+
except Exception as exc: # pragma: no cover - defensive
|
|
1102
|
+
logger.warning("LogicExplainer summarization failed: %s", exc)
|
|
1103
|
+
rationale_errors.append(f"LogicExplainer failed: {exc}")
|
|
1104
|
+
|
|
1105
|
+
if logic_summaries:
|
|
1106
|
+
context.logic_summaries = logic_summaries
|
|
1107
|
+
rationale["logic"] = logic_summaries
|
|
1108
|
+
for summary in logic_summaries:
|
|
1109
|
+
text_parts = [
|
|
1110
|
+
f"Summary: {summary.get('summary', '')}",
|
|
1111
|
+
f"Why: {summary.get('rationale', '')}",
|
|
1112
|
+
]
|
|
1113
|
+
if summary.get("trade_offs"):
|
|
1114
|
+
text_parts.append(f"Trade-offs: {', '.join(summary['trade_offs'])}")
|
|
1115
|
+
if summary.get("side_effects"):
|
|
1116
|
+
text_parts.append(f"Side-effects: {', '.join(summary['side_effects'])}")
|
|
1117
|
+
index_payloads.append({
|
|
1118
|
+
"record_id": f"logic::{summary.get('unit_id')}",
|
|
1119
|
+
"record_type": "rationale_logic",
|
|
1120
|
+
"text": "\n".join([part for part in text_parts if part]),
|
|
1121
|
+
"source_path": summary.get("file_path", ""),
|
|
1122
|
+
"title": summary.get("unit_id", ""),
|
|
1123
|
+
})
|
|
1124
|
+
|
|
1125
|
+
qa_rationale: List[Dict[str, Any]] = []
|
|
1126
|
+
for qa in context.qa_history:
|
|
1127
|
+
rationale_points = qa.get("rationale_points") or []
|
|
1128
|
+
if qa.get("question_type") == "reasoning" or rationale_points:
|
|
1129
|
+
qa_rationale.append({
|
|
1130
|
+
"question": qa.get("question"),
|
|
1131
|
+
"answer": qa.get("answer"),
|
|
1132
|
+
"confidence": qa.get("confidence"),
|
|
1133
|
+
"rationale_points": rationale_points,
|
|
1134
|
+
})
|
|
1135
|
+
qa_id_seed = (qa.get("question") or "") + (qa.get("answer") or "")
|
|
1136
|
+
note_lines = [f"Question: {qa.get('question','')}", f"Answer: {qa.get('answer','')}"]
|
|
1137
|
+
if rationale_points:
|
|
1138
|
+
point_notes = [p.get("note") for p in rationale_points if isinstance(p, dict)]
|
|
1139
|
+
filtered = [p for p in point_notes if p]
|
|
1140
|
+
if filtered:
|
|
1141
|
+
note_lines.append("Notes: " + "; ".join(filtered))
|
|
1142
|
+
index_payloads.append({
|
|
1143
|
+
"record_id": f"qa::{hashlib.sha256(qa_id_seed.encode('utf-8')).hexdigest()[:32]}",
|
|
1144
|
+
"record_type": "rationale_qa",
|
|
1145
|
+
"text": "\n".join(note_lines),
|
|
1146
|
+
"title": qa.get("question", ""),
|
|
1147
|
+
})
|
|
1148
|
+
|
|
1149
|
+
if qa_rationale:
|
|
1150
|
+
rationale["qa"] = qa_rationale
|
|
1151
|
+
|
|
1152
|
+
if rationale_errors:
|
|
1153
|
+
rationale["errors"] = rationale_errors
|
|
1154
|
+
|
|
1155
|
+
if context.indexer and index_payloads:
|
|
1156
|
+
try:
|
|
1157
|
+
context.indexer.upsert_rationale_entries(index_payloads)
|
|
1158
|
+
except Exception as exc: # pragma: no cover - defensive
|
|
1159
|
+
logger.warning("Failed to index rationale entries: %s", exc)
|
|
1160
|
+
rationale.setdefault("errors", []).append(f"Indexing failed: {exc}")
|
|
1161
|
+
|
|
1162
|
+
return rationale
|
|
1163
|
+
|
|
1164
|
+
def _run_synthesis_overview(self, context: ModeContext) -> Dict[str, str]:
|
|
1165
|
+
"""Run synthesis to produce comprehensive documentation files."""
|
|
1166
|
+
try:
|
|
1167
|
+
from .synthesis.llm_synthesizer import LLMSynthesizer
|
|
1168
|
+
except Exception as e:
|
|
1169
|
+
raise RuntimeError("Synthesis module not available. Ensure Phase 3 synthesis is installed.") from e
|
|
1170
|
+
|
|
1171
|
+
# Use provided template, or load universal template as default
|
|
1172
|
+
template_spec = context.synthesis_template
|
|
1173
|
+
|
|
1174
|
+
if not template_spec:
|
|
1175
|
+
# Load universal template as default
|
|
1176
|
+
import yaml
|
|
1177
|
+
from pathlib import Path
|
|
1178
|
+
|
|
1179
|
+
# Find templates directory relative to this file
|
|
1180
|
+
templates_dir = Path(__file__).parent.parent.parent / "templates"
|
|
1181
|
+
universal_template_path = templates_dir / "synthesis_universal.yaml"
|
|
1182
|
+
|
|
1183
|
+
if universal_template_path.exists():
|
|
1184
|
+
try:
|
|
1185
|
+
with open(universal_template_path, 'r') as f:
|
|
1186
|
+
template_spec = yaml.safe_load(f)
|
|
1187
|
+
logger.info(f"📋 Using universal template (5 sections, ~7,700 words)")
|
|
1188
|
+
except Exception as e:
|
|
1189
|
+
logger.warning(f"Failed to load universal template: {e}")
|
|
1190
|
+
template_spec = None
|
|
1191
|
+
|
|
1192
|
+
# Fallback to minimal hardcoded template if universal template fails
|
|
1193
|
+
if not template_spec:
|
|
1194
|
+
logger.info("📋 Using minimal fallback template (3 sections)")
|
|
1195
|
+
template_spec = {
|
|
1196
|
+
"sections": [
|
|
1197
|
+
{
|
|
1198
|
+
"name": "Overview",
|
|
1199
|
+
"instructions": "Explain WHAT the project does and WHY key decisions were made, citing evidence.",
|
|
1200
|
+
"max_words": 650,
|
|
1201
|
+
"rules": {
|
|
1202
|
+
"require_citations": True,
|
|
1203
|
+
"min_citations": 1,
|
|
1204
|
+
"citation_style": "[CITE:source]",
|
|
1205
|
+
"mark_inference": True,
|
|
1206
|
+
"required_elements": ["Purpose", "Key Components"]
|
|
1207
|
+
}
|
|
1208
|
+
},
|
|
1209
|
+
{
|
|
1210
|
+
"name": "Architecture",
|
|
1211
|
+
"instructions": "Describe the system architecture: core modules, data flow, and key dependencies. Cite code locations.",
|
|
1212
|
+
"max_words": 600,
|
|
1213
|
+
"rules": {
|
|
1214
|
+
"require_citations": True,
|
|
1215
|
+
"min_citations": 2,
|
|
1216
|
+
"citation_style": "[CITE:source]",
|
|
1217
|
+
"mark_inference": True,
|
|
1218
|
+
"required_elements": ["Modules", "Data Flow"]
|
|
1219
|
+
}
|
|
1220
|
+
},
|
|
1221
|
+
{
|
|
1222
|
+
"name": "API",
|
|
1223
|
+
"instructions": "Summarize main public interfaces or endpoints discovered in code. Include file:line citations.",
|
|
1224
|
+
"max_words": 600,
|
|
1225
|
+
"rules": {
|
|
1226
|
+
"require_citations": True,
|
|
1227
|
+
"min_citations": 2,
|
|
1228
|
+
"citation_style": "[CITE:source]",
|
|
1229
|
+
"mark_inference": True,
|
|
1230
|
+
"required_elements": ["Interfaces"]
|
|
1231
|
+
}
|
|
1232
|
+
}
|
|
1233
|
+
]
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1236
|
+
# EVIDENCE-FIRST APPROACH: Collect structured evidence
|
|
1237
|
+
# This prioritizes README essentials over code internals
|
|
1238
|
+
structured_evidence = self._collect_structured_evidence(context)
|
|
1239
|
+
|
|
1240
|
+
# Also collect legacy format for backward compatibility
|
|
1241
|
+
context_evidence = self._collect_context_evidence(context, limit=5, structured=structured_evidence)
|
|
1242
|
+
terms = self._derive_search_terms(context, max_terms=8)
|
|
1243
|
+
code_evidence = self._collect_code_evidence(context, terms=terms, per_term=2)
|
|
1244
|
+
|
|
1245
|
+
logger.info("📊 Evidence collected:")
|
|
1246
|
+
if structured_evidence["essentials"]:
|
|
1247
|
+
logger.info(f" ✓ Essentials: {list(structured_evidence['essentials'].keys())}")
|
|
1248
|
+
logger.info(f" ✓ Usage examples: {len(structured_evidence['usage'].get('readme_examples', []))}")
|
|
1249
|
+
logger.info(f" ✓ Code patterns: {len(structured_evidence['implementation'].get('code_patterns', []))}")
|
|
1250
|
+
|
|
1251
|
+
# Create synthesizer with user_focused flag
|
|
1252
|
+
synthesizer = LLMSynthesizer(user_focused=context.user_focused)
|
|
1253
|
+
sections = synthesizer.synthesize(
|
|
1254
|
+
template_spec=template_spec,
|
|
1255
|
+
code_evidence=code_evidence,
|
|
1256
|
+
context_evidence=context_evidence,
|
|
1257
|
+
structured_evidence=structured_evidence, # NEW: Pass structured evidence
|
|
1258
|
+
system_prompt=None, # Uses user-focused prompt if flag is set
|
|
1259
|
+
)
|
|
1260
|
+
|
|
1261
|
+
outputs: Dict[str, str] = {}
|
|
1262
|
+
|
|
1263
|
+
# Process ALL sections returned by the synthesizer dynamically
|
|
1264
|
+
for section_name, content in sections.items():
|
|
1265
|
+
if content: # Only add non-empty sections
|
|
1266
|
+
# Convert section name to filename format
|
|
1267
|
+
filename = f"SYNTHESIZED_{section_name.upper().replace(' ', '_')}.md"
|
|
1268
|
+
outputs[filename] = content
|
|
1269
|
+
logger.info(f"✅ Generated synthesis for section: {section_name}")
|
|
1270
|
+
|
|
1271
|
+
if not outputs:
|
|
1272
|
+
logger.warning("⚠️ No sections were synthesized - check template configuration")
|
|
1273
|
+
|
|
1274
|
+
return outputs
|
|
1275
|
+
|
|
1276
|
+
def _collect_code_evidence(self, context: ModeContext, terms: Optional[List[str]] = None, per_term: int = 2) -> List[Dict[str, str]]:
|
|
1277
|
+
"""Collect top code snippets as evidence using search terms."""
|
|
1278
|
+
evidence: List[Dict[str, str]] = []
|
|
1279
|
+
if not context.searcher:
|
|
1280
|
+
return evidence
|
|
1281
|
+
if not terms:
|
|
1282
|
+
terms = ["api", "service", "auth", "config", "database", "client", "router", "endpoint"]
|
|
1283
|
+
used = set()
|
|
1284
|
+
for term in terms:
|
|
1285
|
+
try:
|
|
1286
|
+
results = context.searcher.search(term, limit=per_term)
|
|
1287
|
+
except Exception:
|
|
1288
|
+
results = []
|
|
1289
|
+
for r in results:
|
|
1290
|
+
key = (r.file_path, r.start_line, r.end_line)
|
|
1291
|
+
if key in used:
|
|
1292
|
+
continue
|
|
1293
|
+
used.add(key)
|
|
1294
|
+
src = f"{r.file_path}:{r.start_line}-{r.end_line}"
|
|
1295
|
+
snip = (r.content or "").strip()[:800]
|
|
1296
|
+
evidence.append({"source": src, "snippet": snip})
|
|
1297
|
+
return evidence
|
|
1298
|
+
|
|
1299
|
+
def _derive_search_terms(self, context: ModeContext, max_terms: int = 8) -> List[str]:
|
|
1300
|
+
"""Derive search terms from repo stats, structure, and recent questions."""
|
|
1301
|
+
terms: List[str] = []
|
|
1302
|
+
# From stats: chunk types
|
|
1303
|
+
try:
|
|
1304
|
+
chunk_types = list((context.indexer.stats or {}).get("chunks_by_type", {}).keys())
|
|
1305
|
+
for ct in chunk_types:
|
|
1306
|
+
if ct:
|
|
1307
|
+
terms.append(ct.lower())
|
|
1308
|
+
except Exception:
|
|
1309
|
+
pass
|
|
1310
|
+
# From repo structure: common dirs under repo/src
|
|
1311
|
+
try:
|
|
1312
|
+
repo = Path(context.repository_path)
|
|
1313
|
+
candidates = []
|
|
1314
|
+
for name in ["src", "app", "server", "backend", "api"]:
|
|
1315
|
+
p = repo / name
|
|
1316
|
+
if p.exists() and p.is_dir():
|
|
1317
|
+
candidates.extend([d.name for d in p.iterdir() if d.is_dir()])
|
|
1318
|
+
terms.extend([t for t in candidates if len(t) > 2])
|
|
1319
|
+
except Exception:
|
|
1320
|
+
pass
|
|
1321
|
+
# From Q&A history: keywords from last 5 questions
|
|
1322
|
+
try:
|
|
1323
|
+
for qa in (context.qa_history or [])[-5:]:
|
|
1324
|
+
q = (qa.get("question") or "").lower()
|
|
1325
|
+
for w in q.replace("?", " ").split():
|
|
1326
|
+
if len(w) > 3 and w.isalpha():
|
|
1327
|
+
terms.append(w)
|
|
1328
|
+
except Exception:
|
|
1329
|
+
pass
|
|
1330
|
+
# Seed/common terms
|
|
1331
|
+
seed = ["api", "route", "controller", "service", "client", "model", "database", "config", "auth", "main"]
|
|
1332
|
+
terms.extend(seed)
|
|
1333
|
+
# Deduplicate and limit
|
|
1334
|
+
seen = set()
|
|
1335
|
+
unique = []
|
|
1336
|
+
for t in terms:
|
|
1337
|
+
if t not in seen:
|
|
1338
|
+
seen.add(t)
|
|
1339
|
+
unique.append(t)
|
|
1340
|
+
return unique[:max_terms]
|
|
1341
|
+
|
|
1342
|
+
def _generate_adaptive_questions(self, context: ModeContext) -> List:
|
|
1343
|
+
"""
|
|
1344
|
+
Generate adaptive question suggestions based on project type and template.
|
|
1345
|
+
|
|
1346
|
+
Uses the new adaptive question generator to provide relevant suggestions.
|
|
1347
|
+
Now supports user-focused mode for README-style quickstart questions!
|
|
1348
|
+
|
|
1349
|
+
Args:
|
|
1350
|
+
context: The mode context with template and repository info
|
|
1351
|
+
|
|
1352
|
+
Returns:
|
|
1353
|
+
List of GeneratedQuestion objects
|
|
1354
|
+
"""
|
|
1355
|
+
from .question_generator import TemplateQuestionGenerator, QuestionFocus
|
|
1356
|
+
|
|
1357
|
+
# Only generate if we have a synthesis template
|
|
1358
|
+
if not context.synthesis_template:
|
|
1359
|
+
return []
|
|
1360
|
+
|
|
1361
|
+
# Get README content for better project type detection
|
|
1362
|
+
readme_content = None
|
|
1363
|
+
if context.context_manager:
|
|
1364
|
+
all_items = context.context_manager.get_all_context()
|
|
1365
|
+
readme_items = [
|
|
1366
|
+
item for item in all_items
|
|
1367
|
+
if "readme" in item.source.lower()
|
|
1368
|
+
]
|
|
1369
|
+
if readme_items:
|
|
1370
|
+
readme_content = readme_items[0].content
|
|
1371
|
+
|
|
1372
|
+
# Generate adaptive questions with appropriate focus
|
|
1373
|
+
generator = TemplateQuestionGenerator()
|
|
1374
|
+
|
|
1375
|
+
# Choose focus mode based on user_focused flag
|
|
1376
|
+
focus = QuestionFocus.USER_FOCUSED if context.user_focused else QuestionFocus.TECHNICAL
|
|
1377
|
+
|
|
1378
|
+
try:
|
|
1379
|
+
questions = generator.generate_questions_from_template(
|
|
1380
|
+
template_spec=context.synthesis_template,
|
|
1381
|
+
repository_path=context.repository_path,
|
|
1382
|
+
readme_content=readme_content,
|
|
1383
|
+
max_questions=10,
|
|
1384
|
+
focus=focus
|
|
1385
|
+
)
|
|
1386
|
+
return questions
|
|
1387
|
+
except Exception as e:
|
|
1388
|
+
logger.warning(f"Could not generate adaptive questions: {e}")
|
|
1389
|
+
return []
|
|
1390
|
+
|
|
1391
|
+
def _suggest_next_question(self, context: ModeContext) -> Optional[str]:
|
|
1392
|
+
"""
|
|
1393
|
+
Suggest the next question based on what's been asked so far.
|
|
1394
|
+
|
|
1395
|
+
Uses adaptive question generator to find unanswered questions that
|
|
1396
|
+
would help fill gaps in documentation.
|
|
1397
|
+
|
|
1398
|
+
Args:
|
|
1399
|
+
context: The mode context with Q&A history
|
|
1400
|
+
|
|
1401
|
+
Returns:
|
|
1402
|
+
Suggested question text or None
|
|
1403
|
+
"""
|
|
1404
|
+
from .question_generator import suggest_next_question
|
|
1405
|
+
|
|
1406
|
+
if not context.synthesis_template or not context.qa_history:
|
|
1407
|
+
return None
|
|
1408
|
+
|
|
1409
|
+
# Convert Q&A history to format expected by suggest_next_question
|
|
1410
|
+
qa_tuples = [
|
|
1411
|
+
(qa.get("question", ""), qa.get("answer", ""))
|
|
1412
|
+
for qa in context.qa_history
|
|
1413
|
+
]
|
|
1414
|
+
|
|
1415
|
+
try:
|
|
1416
|
+
suggestion = suggest_next_question(
|
|
1417
|
+
qa_history=qa_tuples,
|
|
1418
|
+
template_spec=context.synthesis_template,
|
|
1419
|
+
current_phase=None # Auto-detect based on history length
|
|
1420
|
+
)
|
|
1421
|
+
return suggestion
|
|
1422
|
+
except Exception as e:
|
|
1423
|
+
logger.warning(f"Could not generate next question suggestion: {e}")
|
|
1424
|
+
return None
|
|
1425
|
+
|
|
1426
|
+
async def _run_interactive_session(self, context: ModeContext):
|
|
1427
|
+
"""
|
|
1428
|
+
Run the interactive Q&A session with adaptive question suggestions.
|
|
1429
|
+
|
|
1430
|
+
Now includes project type detection and smart question suggestions!
|
|
1431
|
+
|
|
1432
|
+
Args:
|
|
1433
|
+
context: The mode context
|
|
1434
|
+
"""
|
|
1435
|
+
print("\n" + "="*60)
|
|
1436
|
+
print("🚀 Interactive Documentation Explorer")
|
|
1437
|
+
print("="*60)
|
|
1438
|
+
|
|
1439
|
+
# Show context summary
|
|
1440
|
+
if context.context_manager and context.context_manager.total_items > 0:
|
|
1441
|
+
summary = context.context_manager.get_summary()
|
|
1442
|
+
print(f"\n📚 Loaded Context: {summary['total_items']} items")
|
|
1443
|
+
for ctx_type, info in summary.get('by_type', {}).items():
|
|
1444
|
+
print(f" • {ctx_type}: {info['count']} items")
|
|
1445
|
+
|
|
1446
|
+
# Generate adaptive question suggestions
|
|
1447
|
+
suggested_questions = self._generate_adaptive_questions(context)
|
|
1448
|
+
|
|
1449
|
+
if suggested_questions:
|
|
1450
|
+
print(f"\n💡 Suggested questions based on your project:")
|
|
1451
|
+
for i, question in enumerate(suggested_questions[:5], 1):
|
|
1452
|
+
priority_icon = {
|
|
1453
|
+
"critical": "🔴",
|
|
1454
|
+
"high": "🟡",
|
|
1455
|
+
"medium": "🟢",
|
|
1456
|
+
"low": "⚪"
|
|
1457
|
+
}.get(question.priority.value, "❓")
|
|
1458
|
+
print(f" {i}. {priority_icon} {question.text}")
|
|
1459
|
+
print(" (You can ask these or any other questions)")
|
|
1460
|
+
|
|
1461
|
+
print("\n✨ Let's explore your codebase!")
|
|
1462
|
+
print("Commands:")
|
|
1463
|
+
print(" • Ask any question about the code")
|
|
1464
|
+
print(" • Type 'add context' to add more context")
|
|
1465
|
+
print(" • Type 'generate docs' to create documentation")
|
|
1466
|
+
print(" • Type 'help' for more commands")
|
|
1467
|
+
print("")
|
|
1468
|
+
|
|
1469
|
+
while not context.exploration_complete:
|
|
1470
|
+
try:
|
|
1471
|
+
question = input("\n❓ > ").strip()
|
|
1472
|
+
|
|
1473
|
+
if not question:
|
|
1474
|
+
continue
|
|
1475
|
+
|
|
1476
|
+
# Handle special commands
|
|
1477
|
+
if question.lower() == 'generate docs':
|
|
1478
|
+
context.exploration_complete = True
|
|
1479
|
+
print("\n📝 Generating documentation from exploration...")
|
|
1480
|
+
|
|
1481
|
+
# Show query interpretation metrics
|
|
1482
|
+
if context.explorer and hasattr(context.explorer, 'print_metrics_report'):
|
|
1483
|
+
context.explorer.print_metrics_report()
|
|
1484
|
+
|
|
1485
|
+
break
|
|
1486
|
+
|
|
1487
|
+
elif question.lower() == 'add context':
|
|
1488
|
+
if context.explorer.add_context_interactive():
|
|
1489
|
+
print("✅ Context added successfully")
|
|
1490
|
+
continue
|
|
1491
|
+
|
|
1492
|
+
elif question.lower() == 'help':
|
|
1493
|
+
self._show_help()
|
|
1494
|
+
continue
|
|
1495
|
+
|
|
1496
|
+
elif question.lower() == 'exit':
|
|
1497
|
+
print("👋 Exiting without generating documentation")
|
|
1498
|
+
sys.exit(0)
|
|
1499
|
+
|
|
1500
|
+
# Ask the question
|
|
1501
|
+
result = context.explorer.ask(question)
|
|
1502
|
+
|
|
1503
|
+
# Display the answer
|
|
1504
|
+
print(f"\n💡 {result.format_with_context()}")
|
|
1505
|
+
|
|
1506
|
+
# Show follow-up suggestions
|
|
1507
|
+
if result.follow_up_suggestions:
|
|
1508
|
+
print("\n🔍 You might also want to ask:")
|
|
1509
|
+
for suggestion in result.follow_up_suggestions[:3]:
|
|
1510
|
+
print(f" • {suggestion}")
|
|
1511
|
+
|
|
1512
|
+
# Store in Q&A history
|
|
1513
|
+
context.qa_history.append({
|
|
1514
|
+
"question": question,
|
|
1515
|
+
"answer": result.answer,
|
|
1516
|
+
"context_used": len(result.context_items) > 0,
|
|
1517
|
+
"code_results": len(result.code_results),
|
|
1518
|
+
"confidence": result.confidence,
|
|
1519
|
+
"question_type": getattr(result, "question_type", None).name.lower() if getattr(result, "question_type", None) else None,
|
|
1520
|
+
"rationale_points": getattr(result, "rationale_points", [])
|
|
1521
|
+
})
|
|
1522
|
+
|
|
1523
|
+
# Show adaptive next question suggestion every few questions
|
|
1524
|
+
if len(context.qa_history) % 3 == 0 and context.synthesis_template:
|
|
1525
|
+
next_suggestion = self._suggest_next_question(context)
|
|
1526
|
+
if next_suggestion:
|
|
1527
|
+
print(f"\n💭 Suggested next question: {next_suggestion}")
|
|
1528
|
+
|
|
1529
|
+
except KeyboardInterrupt:
|
|
1530
|
+
print("\n\n👋 Interrupted. Exiting...")
|
|
1531
|
+
sys.exit(0)
|
|
1532
|
+
except Exception as e:
|
|
1533
|
+
print(f"\n❌ Error: {e}")
|
|
1534
|
+
logger.exception("Error in interactive session")
|
|
1535
|
+
|
|
1536
|
+
def _show_help(self):
|
|
1537
|
+
"""Show help for interactive mode"""
|
|
1538
|
+
print("\n📖 Interactive Mode Help")
|
|
1539
|
+
print("="*40)
|
|
1540
|
+
print("Questions you can ask:")
|
|
1541
|
+
print(" • What are the main components?")
|
|
1542
|
+
print(" • How does authentication work?")
|
|
1543
|
+
print(" • Why do we use [technology]?")
|
|
1544
|
+
print(" • Show me the API endpoints")
|
|
1545
|
+
print(" • What does [class/function] do?")
|
|
1546
|
+
print("")
|
|
1547
|
+
print("Commands:")
|
|
1548
|
+
print(" • add context - Add requirements, tickets, or docs")
|
|
1549
|
+
print(" • generate docs - Create documentation from exploration")
|
|
1550
|
+
print(" • help - Show this help")
|
|
1551
|
+
print(" • exit - Exit without generating")
|
|
1552
|
+
|
|
1553
|
+
# Documentation generation methods (simplified for now)
|
|
1554
|
+
|
|
1555
|
+
def _generate_auto_readme(self, context: ModeContext) -> str:
|
|
1556
|
+
"""Generate README for AUTO mode"""
|
|
1557
|
+
return f"""# {Path(context.repository_path).name}
|
|
1558
|
+
|
|
1559
|
+
## Overview
|
|
1560
|
+
*Generated from code analysis*
|
|
1561
|
+
|
|
1562
|
+
This documentation was automatically generated from code analysis.
|
|
1563
|
+
|
|
1564
|
+
## Structure
|
|
1565
|
+
Repository contains code files that were analyzed using:
|
|
1566
|
+
- AST-based semantic chunking
|
|
1567
|
+
- Vector embeddings for similarity
|
|
1568
|
+
- BM25 keyword search
|
|
1569
|
+
|
|
1570
|
+
## Note
|
|
1571
|
+
This is AUTO mode documentation - based purely on code analysis without external context.
|
|
1572
|
+
For richer documentation including business context, use INTERACTIVE or HYBRID modes.
|
|
1573
|
+
"""
|
|
1574
|
+
|
|
1575
|
+
def _generate_hybrid_readme(self, context: ModeContext) -> str:
|
|
1576
|
+
"""Generate README for HYBRID mode"""
|
|
1577
|
+
context_summary = context.context_manager.get_summary() if context.context_manager else {}
|
|
1578
|
+
|
|
1579
|
+
return f"""# {Path(context.repository_path).name}
|
|
1580
|
+
|
|
1581
|
+
## Overview
|
|
1582
|
+
*Generated from code analysis + external context*
|
|
1583
|
+
|
|
1584
|
+
This documentation was generated using HYBRID mode, combining:
|
|
1585
|
+
- Code analysis (AST, embeddings, search)
|
|
1586
|
+
- External context ({context_summary.get('total_items', 0)} items)
|
|
1587
|
+
|
|
1588
|
+
## Context Sources
|
|
1589
|
+
{self._format_context_sources(context_summary)}
|
|
1590
|
+
|
|
1591
|
+
## Note
|
|
1592
|
+
This documentation includes external context but was generated without interactive exploration.
|
|
1593
|
+
For documentation that captures specific insights, use INTERACTIVE mode.
|
|
1594
|
+
"""
|
|
1595
|
+
|
|
1596
|
+
def _generate_interactive_readme(self, context: ModeContext) -> str:
|
|
1597
|
+
"""Generate README for INTERACTIVE mode"""
|
|
1598
|
+
qa_count = len(context.qa_history)
|
|
1599
|
+
context_summary = context.context_manager.get_summary() if context.context_manager else {}
|
|
1600
|
+
|
|
1601
|
+
return f"""# {Path(context.repository_path).name}
|
|
1602
|
+
|
|
1603
|
+
## Overview
|
|
1604
|
+
*Generated from interactive exploration with Q&A*
|
|
1605
|
+
|
|
1606
|
+
This documentation was created through INTERACTIVE exploration:
|
|
1607
|
+
- {qa_count} questions explored
|
|
1608
|
+
- {context_summary.get('total_items', 0)} context items used
|
|
1609
|
+
- Code + Context + Human insights combined
|
|
1610
|
+
|
|
1611
|
+
## Key Insights from Exploration
|
|
1612
|
+
{self._format_qa_insights(context.qa_history[:5])}
|
|
1613
|
+
|
|
1614
|
+
## Note
|
|
1615
|
+
This documentation reflects the understanding gained through interactive Q&A.
|
|
1616
|
+
It combines code analysis, external context, and human-guided exploration.
|
|
1617
|
+
"""
|
|
1618
|
+
|
|
1619
|
+
def _generate_auto_architecture(self, context: ModeContext) -> str:
|
|
1620
|
+
"""Generate architecture doc for AUTO mode"""
|
|
1621
|
+
return "# Architecture\n\n*Generated from code structure analysis*\n\nTODO: Integrate with existing pipeline"
|
|
1622
|
+
|
|
1623
|
+
def _generate_hybrid_architecture(self, context: ModeContext) -> str:
|
|
1624
|
+
"""Generate architecture doc for HYBRID mode"""
|
|
1625
|
+
return "# Architecture\n\n*Generated from code + context*\n\nTODO: Integrate with context-aware generation"
|
|
1626
|
+
|
|
1627
|
+
def _generate_interactive_architecture(self, context: ModeContext) -> str:
|
|
1628
|
+
"""Generate architecture doc for INTERACTIVE mode"""
|
|
1629
|
+
return "# Architecture\n\n*Generated from exploration insights*\n\nTODO: Build from Q&A insights"
|
|
1630
|
+
|
|
1631
|
+
def _generate_auto_api(self, context: ModeContext) -> str:
|
|
1632
|
+
"""Generate API doc for AUTO mode"""
|
|
1633
|
+
return "# API Reference\n\n*Generated from code analysis*\n\nTODO: Extract from AST"
|
|
1634
|
+
|
|
1635
|
+
def _generate_hybrid_api(self, context: ModeContext) -> str:
|
|
1636
|
+
"""Generate API doc for HYBRID mode"""
|
|
1637
|
+
return "# API Reference\n\n*Generated with context*\n\nTODO: Add context citations"
|
|
1638
|
+
|
|
1639
|
+
def _generate_interactive_api(self, context: ModeContext) -> str:
|
|
1640
|
+
"""Generate API doc for INTERACTIVE mode"""
|
|
1641
|
+
return "# API Reference\n\n*Generated from exploration*\n\nTODO: Include Q&A insights"
|
|
1642
|
+
|
|
1643
|
+
def _generate_requirements_tracing(self, context: ModeContext) -> str:
|
|
1644
|
+
"""Generate requirements tracing document"""
|
|
1645
|
+
if not context.context_manager:
|
|
1646
|
+
return "# Requirements Tracing\n\nNo requirements context available."
|
|
1647
|
+
|
|
1648
|
+
return f"""# Requirements Tracing
|
|
1649
|
+
|
|
1650
|
+
## Context Sources
|
|
1651
|
+
{self._format_context_sources(context.context_manager.get_summary())}
|
|
1652
|
+
|
|
1653
|
+
## Traceability Matrix
|
|
1654
|
+
TODO: Map requirements to implementation
|
|
1655
|
+
"""
|
|
1656
|
+
|
|
1657
|
+
def _generate_exploration_insights(self, context: ModeContext) -> str:
|
|
1658
|
+
"""Generate insights from Q&A exploration"""
|
|
1659
|
+
return f"""# Exploration Insights
|
|
1660
|
+
|
|
1661
|
+
## Questions Explored
|
|
1662
|
+
Total questions: {len(context.qa_history)}
|
|
1663
|
+
|
|
1664
|
+
## Key Findings
|
|
1665
|
+
{self._format_qa_insights(context.qa_history)}
|
|
1666
|
+
|
|
1667
|
+
## Confidence Levels
|
|
1668
|
+
High confidence answers: {sum(1 for q in context.qa_history if q.get('confidence', 0) > 0.7)}
|
|
1669
|
+
With context support: {sum(1 for q in context.qa_history if q.get('context_used', False))}
|
|
1670
|
+
"""
|
|
1671
|
+
|
|
1672
|
+
def _format_context_sources(self, summary: Dict) -> str:
|
|
1673
|
+
"""Format context sources for documentation"""
|
|
1674
|
+
lines = []
|
|
1675
|
+
for ctx_type, info in summary.get('by_type', {}).items():
|
|
1676
|
+
lines.append(f"- **{ctx_type}**: {info['count']} items")
|
|
1677
|
+
for item in info.get('items', [])[:2]:
|
|
1678
|
+
lines.append(f" - {item}")
|
|
1679
|
+
return "\n".join(lines) if lines else "No context loaded"
|
|
1680
|
+
|
|
1681
|
+
def _format_qa_insights(self, qa_history: List[Dict]) -> str:
|
|
1682
|
+
"""Format Q&A insights for documentation"""
|
|
1683
|
+
lines = []
|
|
1684
|
+
for i, qa in enumerate(qa_history[:10], 1):
|
|
1685
|
+
lines.append(f"{i}. **Q:** {qa['question']}")
|
|
1686
|
+
lines.append(f" **Confidence:** {qa.get('confidence', 0):.1%}")
|
|
1687
|
+
return "\n".join(lines) if lines else "No Q&A session conducted"
|
|
1688
|
+
|
|
1689
|
+
|
|
1690
|
+
async def run_documentation_mode(
|
|
1691
|
+
repository_path: str,
|
|
1692
|
+
output_path: str = "./docs",
|
|
1693
|
+
interactive: bool = False,
|
|
1694
|
+
context_files: Optional[List[str]] = None,
|
|
1695
|
+
jira_config: Optional[Dict] = None,
|
|
1696
|
+
synthesis: bool = True,
|
|
1697
|
+
synthesis_template: Optional[Dict[str, Any]] = None,
|
|
1698
|
+
user_focused: bool = False,
|
|
1699
|
+
editor_enabled: Optional[bool] = None,
|
|
1700
|
+
single_readme_mode: bool = False,
|
|
1701
|
+
) -> ModeContext:
|
|
1702
|
+
"""
|
|
1703
|
+
Main entry point for documentation generation.
|
|
1704
|
+
|
|
1705
|
+
Args:
|
|
1706
|
+
repository_path: Path to repository
|
|
1707
|
+
output_path: Output directory
|
|
1708
|
+
interactive: Enable Q&A session (default: False for automatic)
|
|
1709
|
+
context_files: External context files
|
|
1710
|
+
jira_config: Jira configuration
|
|
1711
|
+
synthesis: Enable synthesis (deprecated, always True)
|
|
1712
|
+
synthesis_template: Custom template
|
|
1713
|
+
user_focused: Use user-focused prompts
|
|
1714
|
+
editor_enabled: Enable two-pass editor workflow (overrides V2D_ENABLE_EDITOR env)
|
|
1715
|
+
single_readme_mode: Use only root README (legacy behavior). Default: False (multi-README)
|
|
1716
|
+
|
|
1717
|
+
Returns:
|
|
1718
|
+
ModeContext populated with generated documentation and editor outputs
|
|
1719
|
+
"""
|
|
1720
|
+
# Create handler
|
|
1721
|
+
handler = DocumentationModeHandler()
|
|
1722
|
+
|
|
1723
|
+
# Initialize
|
|
1724
|
+
context = await handler.initialize_mode(
|
|
1725
|
+
repository_path=repository_path,
|
|
1726
|
+
output_path=output_path,
|
|
1727
|
+
interactive=interactive,
|
|
1728
|
+
context_files=context_files,
|
|
1729
|
+
jira_config=jira_config,
|
|
1730
|
+
synthesis=synthesis,
|
|
1731
|
+
synthesis_template=synthesis_template,
|
|
1732
|
+
user_focused=user_focused,
|
|
1733
|
+
editor_enabled=editor_enabled,
|
|
1734
|
+
single_readme_mode=single_readme_mode,
|
|
1735
|
+
)
|
|
1736
|
+
|
|
1737
|
+
# Generate docs
|
|
1738
|
+
docs = await handler.run_generate_mode(context)
|
|
1739
|
+
|
|
1740
|
+
# Save documentation
|
|
1741
|
+
output_dir = Path(output_path)
|
|
1742
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
1743
|
+
|
|
1744
|
+
for filename, content in docs.items():
|
|
1745
|
+
file_path = output_dir / filename
|
|
1746
|
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
1747
|
+
file_path.write_text(content)
|
|
1748
|
+
logger.info(f"Saved {filename}")
|
|
1749
|
+
|
|
1750
|
+
context.generated_docs = docs
|
|
1751
|
+
|
|
1752
|
+
print(f"\n✅ Documentation generated in {output_path}")
|
|
1753
|
+
return context
|
|
1754
|
+
|
|
1755
|
+
|
|
1756
|
+
if __name__ == "__main__":
|
|
1757
|
+
# Test the mode handler
|
|
1758
|
+
import sys
|
|
1759
|
+
|
|
1760
|
+
if len(sys.argv) < 3:
|
|
1761
|
+
print("Usage: python mode_handler.py <mode> <repository_path> [context_files...]")
|
|
1762
|
+
print("Modes: auto, interactive, hybrid")
|
|
1763
|
+
sys.exit(1)
|
|
1764
|
+
|
|
1765
|
+
mode = sys.argv[1]
|
|
1766
|
+
repo_path = sys.argv[2]
|
|
1767
|
+
context_files = sys.argv[3:] if len(sys.argv) > 3 else None
|
|
1768
|
+
|
|
1769
|
+
# Run the mode
|
|
1770
|
+
asyncio.run(run_documentation_mode(
|
|
1771
|
+
mode=mode,
|
|
1772
|
+
repository_path=repo_path,
|
|
1773
|
+
context_files=context_files
|
|
1774
|
+
))
|