code-finder 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. code_finder-0.1.0/PKG-INFO +823 -0
  2. code_finder-0.1.0/README.md +798 -0
  3. code_finder-0.1.0/pyproject.toml +51 -0
  4. code_finder-0.1.0/setup.cfg +4 -0
  5. code_finder-0.1.0/src/claude_context/__init__.py +33 -0
  6. code_finder-0.1.0/src/claude_context/agentic_integration.py +309 -0
  7. code_finder-0.1.0/src/claude_context/ast_chunker.py +646 -0
  8. code_finder-0.1.0/src/claude_context/config.py +239 -0
  9. code_finder-0.1.0/src/claude_context/context_manager.py +627 -0
  10. code_finder-0.1.0/src/claude_context/embeddings.py +307 -0
  11. code_finder-0.1.0/src/claude_context/embeddings_interface.py +226 -0
  12. code_finder-0.1.0/src/claude_context/enhanced_ast_chunker.py +1129 -0
  13. code_finder-0.1.0/src/claude_context/explorer.py +951 -0
  14. code_finder-0.1.0/src/claude_context/explorer_with_context.py +1008 -0
  15. code_finder-0.1.0/src/claude_context/indexer.py +893 -0
  16. code_finder-0.1.0/src/claude_context/markdown_chunker.py +421 -0
  17. code_finder-0.1.0/src/claude_context/mode_handler.py +1774 -0
  18. code_finder-0.1.0/src/claude_context/query_metrics.py +164 -0
  19. code_finder-0.1.0/src/claude_context/question_generator.py +800 -0
  20. code_finder-0.1.0/src/claude_context/readme_extractor.py +485 -0
  21. code_finder-0.1.0/src/claude_context/repository_adapter.py +399 -0
  22. code_finder-0.1.0/src/claude_context/search.py +493 -0
  23. code_finder-0.1.0/src/claude_context/skills/__init__.py +11 -0
  24. code_finder-0.1.0/src/claude_context/skills/_cli_common.py +74 -0
  25. code_finder-0.1.0/src/claude_context/skills/_index_manager.py +98 -0
  26. code_finder-0.1.0/src/claude_context/skills/api_surface.py +219 -0
  27. code_finder-0.1.0/src/claude_context/skills/evidence_retrieval.py +151 -0
  28. code_finder-0.1.0/src/claude_context/skills/grounded_review.py +212 -0
  29. code_finder-0.1.0/src/claude_context/synthesis/__init__.py +8 -0
  30. code_finder-0.1.0/src/claude_context/synthesis/editor_agent.py +391 -0
  31. code_finder-0.1.0/src/claude_context/synthesis/llm_synthesizer.py +153 -0
  32. code_finder-0.1.0/src/claude_context/synthesis/logic_explainer.py +235 -0
  33. code_finder-0.1.0/src/claude_context/synthesis/multi_review_pipeline.py +717 -0
  34. code_finder-0.1.0/src/claude_context/synthesis/prompt_builder.py +439 -0
  35. code_finder-0.1.0/src/claude_context/synthesis/providers.py +115 -0
  36. code_finder-0.1.0/src/claude_context/synthesis/validators.py +458 -0
  37. code_finder-0.1.0/src/code_finder.egg-info/PKG-INFO +823 -0
  38. code_finder-0.1.0/src/code_finder.egg-info/SOURCES.txt +42 -0
  39. code_finder-0.1.0/src/code_finder.egg-info/dependency_links.txt +1 -0
  40. code_finder-0.1.0/src/code_finder.egg-info/entry_points.txt +4 -0
  41. code_finder-0.1.0/src/code_finder.egg-info/requires.txt +18 -0
  42. code_finder-0.1.0/src/code_finder.egg-info/top_level.txt +1 -0
  43. code_finder-0.1.0/tests/test_all_components.py +378 -0
  44. code_finder-0.1.0/tests/test_docstring_indexer.py +41 -0
@@ -0,0 +1,823 @@
1
+ Metadata-Version: 2.4
2
+ Name: code-finder
3
+ Version: 0.1.0
4
+ Summary: Code evidence retrieval and grounded review for documentation workflows. AST chunking, hybrid search (BM25 + vector), and API surface extraction.
5
+ License-Expression: Apache-2.0
6
+ Keywords: documentation,code analysis,code evidence,semantic search,ast,embeddings
7
+ Requires-Python: >=3.10
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: pymilvus>=2.3.0
10
+ Requires-Dist: milvus-lite>=2.3.0
11
+ Requires-Dist: sentence-transformers>=2.2.0
12
+ Requires-Dist: rank-bm25
13
+ Requires-Dist: numpy>=1.24.0
14
+ Requires-Dist: tqdm>=4.65.0
15
+ Requires-Dist: tree-sitter
16
+ Requires-Dist: tree-sitter-python
17
+ Requires-Dist: tree-sitter-javascript
18
+ Requires-Dist: tree-sitter-typescript
19
+ Requires-Dist: tree-sitter-go
20
+ Provides-Extra: synthesis
21
+ Requires-Dist: anthropic>=0.34.0; extra == "synthesis"
22
+ Provides-Extra: dev
23
+ Requires-Dist: pytest>=7.0; extra == "dev"
24
+ Requires-Dist: ruff; extra == "dev"
25
+
26
+ # Vibe2Doc
27
+
28
+ AI-powered documentation generation system that creates grounded, evidence-based documentation from your codebase using semantic search and LLM synthesis.
29
+
30
+ Vibe2Doc is a command-line tool that generates comprehensive documentation directly from your terminal. For higher quality results, choose between:
31
+ - **Multi-Review Pipeline** (`--multi-review`): Fully automated writer + 3 specialist reviewers (91% accuracy, ~30 min)
32
+ - **Two-Pass Workflow** (`--review`): Manual IDE refinement with Cursor/Claude Code for final polish
33
+
34
+ ## 🚀 Key Features
35
+
36
+ - **Evidence-Driven Documentation**: Prioritizes README essentials (install, quickstart) before code internals
37
+ - **Multi-README Support**: Automatically discovers READMEs throughout your repository (root + subdirectories)
38
+ - **LLM-Powered**: Uses Claude Sonnet 4 to synthesize complete documentation
39
+ - **Mandatory Validation**: Fails if essential content (installation, quickstart) is missing
40
+ - **Two Modes**: Automatic (fast) or Interactive (with Q&A for gaps)
41
+ - **Quality Enhancement Options**:
42
+ - **Multi-Review Pipeline** (`--multi-review`): Automated writer + 3 specialist reviewers with fresh-eyes isolation and regression checks
43
+ - **Two-Pass Workflow** (`--review`): Manual IDE refinement for final polish with full codebase context
44
+ - **Semantic Search**: AST-based code chunking with hybrid BM25 + vector search
45
+ - **Context Integration**: Include requirements, design docs, and other context files
46
+
47
+ ## 📋 Quick Start
48
+
49
+ ### Installation
50
+
51
+ ```bash
52
+ # Clone the repository
53
+ git clone https://github.com/yourusername/vibe2doc.git
54
+ cd vibe2doc
55
+
56
+ # Create and activate virtual environment
57
+ python -m venv venv
58
+ source venv/bin/activate # On Windows: venv\Scripts\activate
59
+
60
+ # Install dependencies
61
+ pip install -r requirements-claude-context.txt
62
+
63
+ # Set up environment variables (for LLM synthesis)
64
+ cp .env.example .env
65
+ # Edit .env and add your API keys:
66
+ # ANTHROPIC_API_KEY=your_key_here
67
+ # OPENAI_API_KEY=your_key_here (optional)
68
+ ```
69
+
70
+ ### Clone the Repository You Want to Document
71
+
72
+ Before generating documentation, you need to clone the GitHub project you want to document:
73
+
74
+ 1. Navigate to the GitHub repository you want to document
75
+ 2. Clone it to your local machine:
76
+
77
+ ```bash
78
+ # Option 1: Clone into the same parent directory as vibe2doc
79
+ cd /path/to/parent-directory
80
+ git clone https://github.com/owner/target-repo.git
81
+
82
+ # Option 2: Clone to any location
83
+ git clone https://github.com/owner/target-repo.git /path/to/your/preferred/location
84
+ ```
85
+
86
+ **Note**: When running `generate_docs.py`, you'll provide the path to this cloned repository. If you cloned it next to vibe2doc, you can use a relative path like `../target-repo`. Otherwise, use the full absolute path.
87
+
88
+ ### Basic Usage
89
+
90
+ ```bash
91
+ # Automatic mode (default) - Fast, complete documentation
92
+ python generate_docs.py /path/to/repo --output ./docs
93
+
94
+ # With context files (recommended for better docs)
95
+ python generate_docs.py /path/to/repo \
96
+ --context requirements.md design.md \
97
+ --output ./docs
98
+
99
+ # Interactive mode - Ask targeted questions to fill gaps
100
+ python generate_docs.py /path/to/repo \
101
+ --interactive \
102
+ --context requirements.md \
103
+ --output ./docs
104
+
105
+ # User-focused (quickstart-first) vs technical (architecture-first)
106
+ python generate_docs.py /path/to/repo \
107
+ --user-focused \
108
+ --output ./docs
109
+
110
+ # With editor review (manual IDE refinement) - LLM critiques and refines each section
111
+ python generate_docs.py /path/to/repo \
112
+ --review \
113
+ --output ./docs
114
+
115
+ # With multi-review pipeline (automated) - Writer + 3 specialist reviewers
116
+ python generate_docs.py /path/to/repo \
117
+ --multi-review \
118
+ --output ./docs
119
+
120
+ # With custom synthesis template
121
+ python generate_docs.py /path/to/repo \
122
+ --synthesis-template ./templates/synthesis_universal.yaml \
123
+ --output ./docs
124
+
125
+ # Use only root README (legacy behavior, skips subdirectory READMEs)
126
+ python generate_docs.py /path/to/repo \
127
+ --single-readme \
128
+ --output ./docs
129
+ ```
130
+
131
+ ### Output
132
+
133
+ Vibe2Doc generates comprehensive documentation:
134
+
135
+ - `SYNTHESIZED_GETTING_STARTED.md` - Installation, quick start, and first-run guidance
136
+ - `SYNTHESIZED_TERMINOLOGY.md` - Glossary of domain-specific terms and acronyms
137
+ - `SYNTHESIZED_CONCEPTS.md` - How the system works, architecture, mental models
138
+ - `SYNTHESIZED_SCHEMES_AND_PATTERNS.md` - High-level workflows, recipes, and decision points
139
+ - `SYNTHESIZED_HOW_TO_GUIDES.md` - Scenario-driven how-to guides with step-by-step commands
140
+ - `SYNTHESIZED_API_REFERENCE.md` - Core classes, functions, and CLI commands
141
+ - `SYNTHESIZED_TROUBLESHOOTING.md` - Common issues, error messages, and solutions
142
+ - `SYNTHESIZED_SOURCES.md` - Citations and evidence transparency
143
+ - `SYNTHESIZED_TUTORIALS.md` - Learning-oriented walkthroughs (if enabled)
144
+ - `SYNTHESIZED_DEVELOPER_GUIDE.md` - Contributor setup and guidelines (if enabled)
145
+ - `REQUIREMENTS_TRACING.md` - Requirements mapping (if context files provided)
146
+ - `editor/` - Editor-reviewed revisions (when using `--review` flag)
147
+
148
+ **Editor Review Pass** (with `--review` flag):
149
+ After initial synthesis, an LLM-powered editor agent critiques each section, identifies factual errors or missing rationale, and proposes refined versions. The editor uses the same hybrid search infrastructure to ground all revisions in repository evidence. Refined drafts are saved to the `editor/` subdirectory.
150
+
151
+ **Note**: All files are LLM-synthesized with evidence-based validation
152
+
153
+ ## 📖 Documentation Modes
154
+
155
+ ### Two-Pass Workflow (with `--review` flag)
156
+ Combine automatic synthesis with an editor agent that critiques the draft, then refine using your IDE's AI assistant (Cursor Composer or Claude Code) which has full codebase context:
157
+
158
+ 1. **Auto Synthesis** – Run `generate_docs.py` to collect evidence and produce `SYNTHESIZED_*.md` files with citations.
159
+
160
+ 2. **Editor Review** – Add the `--review` flag to invoke the editor agent:
161
+ ```bash
162
+ python generate_docs.py /path/to/repo --review --output ./docs
163
+ ```
164
+ The editor reviews each section, identifies issues (factual errors, missing rationale, style gaps), and generates a single combined prompt covering all sections.
165
+
166
+ 3. **IDE Refinement** – Use the generated prompt with your IDE's AI assistant:
167
+ - The combined prompt is automatically copied to your clipboard
168
+ - Open your project in Cursor or VS Code with Claude Code
169
+ - Paste the prompt (Cmd+V) into Composer or Claude Code chat
170
+ - The AI will refine all documentation files using its full understanding of your codebase
171
+
172
+ 4. **Continue the Conversation** – After the initial fixes, you can continue chatting with your IDE's AI to:
173
+ - Ask questions: "Can you explain the authentication flow in more detail?"
174
+ - Request enhancements: "Add a troubleshooting section for common errors"
175
+ - Fill gaps: "The API reference is missing the batch processing endpoint"
176
+
177
+ 5. **Publishing** – Use `scripts/prepare_docs.py` to copy the refined output into `docs/generated/` for MkDocs preview.
178
+
179
+ **Why not just use Cursor/Claude Code directly?** You could ask your IDE to write documentation from scratch, but you'd get generic, surface-level content. Vibe2Doc provides critical value that the IDE refinement builds upon:
180
+
181
+ | What Vibe2Doc Provides | Why It Matters |
182
+ |------------------------|----------------|
183
+ | **Structured evidence collection** | Systematically extracts installation commands, quickstart examples, and API patterns - ensuring nothing essential is missed |
184
+ | **Rationale extraction** | LogicExplainer captures the *why* behind code decisions - trade-offs, side effects, design intent that aren't obvious from code alone |
185
+ | **Validated first draft** | The synthesized docs are already grounded in evidence with citations, giving the IDE a solid foundation to refine rather than invent |
186
+ | **Targeted review prompts** | EditorAgent identifies specific issues (not vague suggestions), so the IDE knows exactly what to fix |
187
+
188
+ **The result**: Your IDE's AI refines *evidence-based documentation* rather than hallucinating from scratch. It fills gaps and improves style while the factual foundation remains grounded in your actual codebase.
189
+
190
+ Use this mode when you want the highest quality documentation with accurate, context-aware refinements.
191
+
192
+ ### Multi-Review Refinement (Two-Step Workflow) ⭐ RECOMMENDED
193
+
194
+ **The most reliable workflow**: Generate high-quality docs with `--review`, then refine in Claude Code with `/multi-review` skill.
195
+
196
+ #### Step 1: Generate Base Documentation (Fast & Reliable)
197
+
198
+ ```bash
199
+ python3 generate_docs.py /path/to/repo --review --output ./docs
200
+ ```
201
+
202
+ **Runtime**: 15-20 minutes
203
+ **Accuracy**: 85-97% (excellent quality, ready to publish)
204
+ **No timeouts**: Stable, proven workflow
205
+
206
+ #### Step 2: Multi-Review Refinement in Claude Code (Optional Polish)
207
+
208
+ If you want to push accuracy to 95-99% and remove boilerplate repetition:
209
+
210
+ ```bash
211
+ # In Claude Code chat or terminal
212
+ /multi-review docs
213
+ ```
214
+
215
+ **What the `/multi-review` skill does**:
216
+
217
+ 1. **Reviewer A (Technical Accuracy)** – Batch-reviews 5 docs at a time
218
+ - Verifies code examples, function signatures, file paths
219
+ - Checks import statements and API references
220
+ - Validates citations point to real files
221
+
222
+ 2. **Reviewer B (Boilerplate Removal)** ⭐ **PRIMARY FOCUS**
223
+ - **Removes duplicate Installation/Quickstart/Citation blocks**
224
+ - Ensures ONLY `SYNTHESIZED_GETTING_STARTED.md` has full setup instructions
225
+ - Replaces duplicate content with cross-references:
226
+ `"For installation, see [Getting Started](../SYNTHESIZED_GETTING_STARTED)"`
227
+ - Checks learning flow (beginner → advanced)
228
+
229
+ 3. **Reviewer C (Consistency + Safety Check)**
230
+ - Verifies naming consistency, citation format, cross-reference links
231
+ - Final check for any remaining boilerplate duplication
232
+ - Catches regressions introduced by previous reviewers
233
+
234
+ **Runtime**: 15-25 minutes (batched to avoid timeouts)
235
+ **Output**: Refined docs + `REVIEW_LOG.json` audit trail
236
+ **Accuracy improvement**: 85-97% → 95-99%
237
+
238
+ #### Why Two Steps is Better Than Automated Pipeline
239
+
240
+ | Aspect | Automated `--multi-review` | Two-Step Workflow |
241
+ |--------|---------------------------|-------------------|
242
+ | **Base generation** | ❌ Timeouts on large repos | ✅ Always completes (15-20 min) |
243
+ | **Refinement** | ❌ Timeouts, expensive | ✅ Claude Code handles it well |
244
+ | **Cost** | $$$ (many long API calls) | $ (base) + $$ (refinement when needed) |
245
+ | **Control** | ❌ Black box, can't intervene | ✅ Monitor, stop, resume |
246
+ | **Failure recovery** | ❌ Start over | ✅ Continue from where it stopped |
247
+ | **Repo context** | ⚠️ Via searcher | ✅ Direct Read/Edit/Write tools |
248
+
249
+ #### When to Use Multi-Review Refinement
250
+
251
+ ✅ **Use `/multi-review` skill when**:
252
+ - You see duplicate quickstart/installation sections across docs
253
+ - You want 95-99% accuracy (vs 85-97% from base generation)
254
+ - You need an audit trail of review decisions (`REVIEW_LOG.json`)
255
+ - You're documenting workflows, architecture, tutorials
256
+
257
+ ❌ **Skip refinement when**:
258
+ - Base docs are already 95%+ accurate (run validation to check)
259
+ - You're generating API reference (use `mkdocstrings` instead)
260
+ - Time is critical (base docs are already high quality)
261
+
262
+ #### Example Workflow
263
+
264
+ ```bash
265
+ # 1. Generate base docs (fast, reliable)
266
+ python3 generate_docs.py speculators --review --output ./docs
267
+
268
+ # 2. Validate accuracy
269
+ python3 validate_documentation_accuracy.py docs/ speculators/
270
+ # Result: 96.8% accuracy ✅
271
+
272
+ # 3. (Optional) If you want to remove boilerplate and push to 98%+
273
+ # In Claude Code:
274
+ /multi-review docs
275
+
276
+ # 4. Validate again
277
+ python3 validate_documentation_accuracy.py docs/ speculators/
278
+ # Result: 98.5% accuracy ✅
279
+ ```
280
+
281
+ **Result**: High-quality documentation with no timeouts, full control, and optional refinement when needed.
282
+
283
+ **Comparison with `--review` flag**:
284
+
285
+ | Aspect | `--review` (Manual) | `--multi-review` (Automated) |
286
+ |--------|---------------------|------------------------------|
287
+ | **Interaction** | Manual IDE refinement | Fully automated |
288
+ | **Time** | 15-45 min (depends on IDE work) | ~30 min (automatic) |
289
+ | **Quality** | Highest (human + AI context) | Very good (91% accuracy) |
290
+ | **Best for** | Final polish, gap filling | Conceptual docs, tutorials |
291
+ | **Audit trail** | IDE chat history | REVIEW_LOG.json |
292
+ | **Repeatability** | Manual steps | Fully reproducible |
293
+
294
+ **Recommended workflow**: Use `--multi-review` for automated baseline quality, then optionally use `--review` for final human-guided refinement if needed.
295
+
296
+ ### Automatic Mode (Default)
297
+ Fast, automatic documentation generation from code and README.
298
+ - **Use when**: You want complete documentation quickly
299
+ - **Process**:
300
+ 1. Extracts essentials from README (install, quickstart, auth)
301
+ 2. Analyzes code (architecture, API, patterns)
302
+ 3. Synthesizes complete documentation with LLM
303
+ - **Output**: Complete docs with installation, quickstart, architecture, API
304
+ - **Time**: 5-10 minutes
305
+
306
+ ### Interactive Mode
307
+ Guided documentation with targeted Q&A to fill gaps.
308
+ - **Use when**: You want to add design insights and context
309
+ - **Process**:
310
+ 1. Auto-extracts what it can (same as Automatic)
311
+ 2. Shows coverage: what's documented vs what's missing
312
+ 3. Asks targeted questions to fill gaps
313
+ 4. Generates complete documentation
314
+ - **Output**: Complete docs + human insights
315
+ - **Time**: 15-30 minutes
316
+
317
+ ## 🎯 Validating Documentation Accuracy
318
+
319
+ Vibe2Doc includes an **automated accuracy validation tool** that objectively tests generated documentation against your codebase.
320
+
321
+ ### Quick Validation
322
+
323
+ ```bash
324
+ # Validate documentation accuracy
325
+ python3 validate_documentation_accuracy.py docs/ /path/to/repo/
326
+
327
+ # With detailed JSON report
328
+ python3 validate_documentation_accuracy.py \
329
+ docs/ \
330
+ /path/to/repo/ \
331
+ --output accuracy_report.json
332
+ ```
333
+
334
+ ### What It Tests
335
+
336
+ The validator automatically checks **7 categories** of accuracy:
337
+
338
+ | Category | Tests | Example |
339
+ |----------|-------|---------|
340
+ | **Installation Commands** | `pip install` syntax validity | ✅ `pip install speculators` |
341
+ | **File Paths** | Referenced files exist in repo | ✅ `scripts/data_generation.py` exists |
342
+ | **Import Statements** | Modules/functions are real | ✅ `from pkg.module import func` works |
343
+ | **API Signatures** | Functions mentioned exist | ✅ `load_model_layers()` found in source |
344
+ | **Citations** | `[CITE:file]` points to real files | ✅ All citation targets exist |
345
+ | **Code Examples** | Python syntax is valid | ✅ Code blocks parse correctly |
346
+ | **Command Syntax** | No typos in bash commands | ✅ Commands have correct syntax |
347
+
348
+ ### Sample Output
349
+
350
+ ```
351
+ ================================================================================
352
+ 📊 DOCUMENTATION ACCURACY REPORT
353
+ ================================================================================
354
+
355
+ Overall Accuracy: 92.8%
356
+ Total Tests: 209
357
+ ✅ Passed: 194
358
+ ❌ Failed: 15
359
+
360
+ By Category:
361
+ --------------------------------------------------------------------------------
362
+ citations 122/122 (100.0%) ✅
363
+ code_examples 6/ 6 (100.0%) ✅
364
+ commands 45/ 45 (100.0%) ✅
365
+ file_paths 11/ 19 ( 57.9%) ❌
366
+ imports 2/ 9 ( 22.2%) ❌
367
+ installation 8/ 8 (100.0%) ✅
368
+ ```
369
+
370
+ ### Accuracy Grading
371
+
372
+ | Score | Grade | Action |
373
+ |-------|-------|--------|
374
+ | 95-100% | A+ | ✅ Production ready |
375
+ | 90-94% | A | ✅ Publish (multi-review achieves this) |
376
+ | 85-89% | B+ | ⚠️ Review failures, fix critical |
377
+ | 80-84% | B | ⚠️ Fix critical/major issues |
378
+ | <80% | C-F | ❌ Fix before publishing |
379
+
380
+ **Multi-review pipeline achievement**: 92.8% accuracy (A grade)
381
+
382
+ ### CI/CD Integration
383
+
384
+ The validator exits with code 0 if accuracy ≥ 80%, making it perfect for CI/CD:
385
+
386
+ ```bash
387
+ # Quality gate in your workflow
388
+ python3 generate_docs.py repo/ --multi-review --output docs/ && \
389
+ python3 validate_documentation_accuracy.py docs/ repo/
390
+
391
+ if [ $? -eq 0 ]; then
392
+ echo "✅ Documentation validated - ready to publish"
393
+ else
394
+ echo "❌ Accuracy below threshold - review errors"
395
+ exit 1
396
+ fi
397
+ ```
398
+
399
+ For detailed guidance, see [ACCURACY_VALIDATION_GUIDE.md](ACCURACY_VALIDATION_GUIDE.md).
400
+
401
+ ## 📚 Multi-README Support
402
+
403
+ Vibe2Doc automatically discovers and incorporates README files from throughout your repository:
404
+
405
+ ### How It Works
406
+
407
+ - **Root README** (highest priority): Used for installation, quickstart, and authentication essentials
408
+ - **Module READMEs**: Provide context for specific subsystems, incorporated into How-To Guides
409
+ - **Automatic discovery**: No need to manually specify context files for READMEs
410
+ - **Smart filtering**: Ignores READMEs in `node_modules/`, `venv/`, `build/`, `.git/`, etc.
411
+
412
+ ### Example Repository Structure
413
+
414
+ ```
415
+ myproject/
416
+ ├── README.md # Main project overview, installation (→ Getting Started)
417
+ ├── src/
418
+ │ ├── api/
419
+ │ │ └── README.md # API-specific examples, endpoints (→ How-To Guides)
420
+ │ └── cli/
421
+ │ └── README.md # CLI command reference (→ How-To Guides)
422
+ ├── examples/
423
+ │ └── README.md # Usage examples (→ How-To Guides)
424
+ └── .v2dignore # Optional: exclude specific paths
425
+ ```
426
+
427
+ ### Limits and Safeguards
428
+
429
+ | Limit | Value | Purpose |
430
+ |-------|-------|---------|
431
+ | Max depth | 4 levels | Avoid deeply nested READMEs |
432
+ | Max READMEs | 15 | Prevent context explosion |
433
+ | Max per-file | 10K chars | Truncate oversized READMEs |
434
+ | Max total | 100K chars | Protect token budget |
435
+ | Min size | 200 chars | Skip stubs |
436
+
437
+ ### Custom Ignore Patterns (.v2dignore)
438
+
439
+ Create a `.v2dignore` file in your repository root to exclude specific READMEs:
440
+
441
+ ```bash
442
+ # .v2dignore - gitignore-style patterns
443
+ examples/legacy/ # Ignore legacy examples directory
444
+ vendor/ # Ignore vendored dependencies
445
+ **/internal/* # Ignore internal module READMEs
446
+ ```
447
+
448
+ ### Legacy Mode
449
+
450
+ Use `--single-readme` to revert to the previous behavior (only root README):
451
+
452
+ ```bash
453
+ python generate_docs.py /path/to/repo --single-readme --output ./docs
454
+ ```
455
+
456
+ ## ⚙️ Configuration
457
+
458
+ ### Environment Variables
459
+ ```bash
460
+ # REQUIRED: API key for LLM synthesis (documentation generation requires this)
461
+ ANTHROPIC_API_KEY=sk-ant-... # Claude Sonnet 4
462
+
463
+ # Optional configuration
464
+ VIBE2DOC_MODEL=claude-sonnet-4-5@20250929
465
+ VIBE2DOC_TEMPERATURE=0.2
466
+ VIBE2DOC_MAX_TOKENS=1200
467
+ ```
468
+
469
+ ### Synthesis Templates
470
+
471
+ Vibe2Doc uses `synthesis_universal.yaml` by default, which provides comprehensive documentation structure based on the [Diátaxis framework](https://diataxis.fr/). You can customize synthesis behavior by creating your own YAML template:
472
+
473
+ ```bash
474
+ # Use the default universal template
475
+ python generate_docs.py /path/to/repo --output ./docs
476
+
477
+ # Use a custom template
478
+ python generate_docs.py /path/to/repo \
479
+ --synthesis-template ./templates/my_custom_template.yaml \
480
+ --output ./docs
481
+ ```
482
+
483
+ #### Documentation Content Types
484
+
485
+ The template generates different types of documentation, each serving a distinct purpose:
486
+
487
+ | Content Type | Purpose | User Question Answered |
488
+ |--------------|---------|------------------------|
489
+ | **Getting Started** | Quick path to first success | "How do I install and run this?" |
490
+ | **Terminology** | Glossary of domain terms | "What does X mean?" |
491
+ | **Concepts** | Understanding how it works | "How does this system work?" |
492
+ | **Schemes & Patterns** | Decision guidance | "Which approach should I use?" |
493
+ | **How-To Guides** | Task completion | "How do I accomplish X?" |
494
+ | **API Reference** | Complete, accurate details | "What are the exact parameters?" |
495
+ | **Troubleshooting** | Problem resolution | "Why isn't this working?" |
496
+ | **Tutorials** *(optional)* | Guided learning | "Teach me how to use this" |
497
+ | **Developer Guide** *(optional)* | Contribution guidance | "How do I contribute?" |
498
+
499
+ #### Core Sections (Always Generated)
500
+
501
+ These sections are generated by default for every project:
502
+
503
+ - **Getting Started** (2500 words) - Installation, prerequisites, quickstart example, navigation hints
504
+ - **Terminology** (800 words) - Glossary of domain-specific terms and acronyms
505
+ - **Concepts** (2500 words) - How the system works, architecture overview, mental models
506
+ - **Schemes and Patterns** (2500 words) - Workflows, decision guides, compatibility tables
507
+ - **How-To Guides** (3000 words) - Step-by-step task completion guides
508
+ - **API Reference** (3500 words) - Classes, functions, CLI commands with parameter tables
509
+ - **Troubleshooting** (1500 words) - Common issues, error messages, solutions
510
+ - **Sources** (400 words) - Citations and evidence transparency
511
+
512
+ #### Optional Sections
513
+
514
+ These sections are disabled by default but can be enabled for projects that need them.
515
+
516
+ ##### Tutorials (Learning-Oriented)
517
+
518
+ **When to enable**: Projects with significant learning curves where users need guided exploration.
519
+
520
+ **Tutorial vs How-To Guide**:
521
+ - **Tutorial**: "Learn speculative decoding by building a simple speculator" - teaches concepts through doing, explains *why* at each step
522
+ - **How-To**: "Deploy a speculator to vLLM" - assumes understanding, focuses on task completion
523
+
524
+ **Enable when**:
525
+ - Project has complex concepts that benefit from step-by-step learning
526
+ - There are example projects, notebooks, or demos in the codebase
527
+ - README mentions "tutorial", "learn", or "getting started guide"
528
+ - New users frequently struggle with the same conceptual hurdles
529
+
530
+ **How to enable**: Edit `synthesis_universal.yaml`:
531
+ ```yaml
532
+ - name: Tutorials
533
+ enabled: true # Change from false to true
534
+ order: 9
535
+ max_words: 3000
536
+ ```
537
+
538
+ ##### Developer Guide (Contributor-Oriented)
539
+
540
+ **When to enable**: Open-source projects that accept external contributions.
541
+
542
+ **Enable when**:
543
+ - Project has a CONTRIBUTING.md or contribution guidelines
544
+ - There's a test suite, CI/CD pipeline, or code quality tools
545
+ - You want to onboard new contributors efficiently
546
+ - The development setup differs from user installation
547
+
548
+ **What it covers**:
549
+ - Development environment setup (clone, install dev dependencies)
550
+ - Running tests (unit, integration, e2e)
551
+ - Code style and quality requirements (linting, formatting, pre-commit hooks)
552
+ - Pull request process and CI/CD checks
553
+
554
+ **How to enable**: Edit `synthesis_universal.yaml`:
555
+ ```yaml
556
+ - name: Developer Guide
557
+ enabled: true # Change from false to true
558
+ order: 10
559
+ max_words: 2000
560
+ ```
561
+
562
+ #### Word Count Guidelines
563
+
564
+ Word counts are tuned for typical projects but can be adjusted:
565
+
566
+ | Section | Default | Increase If... |
567
+ |---------|---------|----------------|
568
+ | API Reference | 3500 | Large API surface (50+ endpoints) |
569
+ | How-To Guides | 3000 | Many distinct use cases (5+) |
570
+ | Concepts | 2500 | Complex architecture |
571
+ | Tutorials | 3000 | Multi-step learning paths |
572
+
573
+ **Customizing Templates**: See [templates/README.md](templates/README.md) for detailed guidance on creating and modifying synthesis templates, including section structure, word limits, citation requirements, and validation rules.
574
+
575
+ ## 🏗️ Architecture
576
+
577
+ Vibe2Doc combines semantic code search with LLM-powered agents that work together to generate accurate, evidence-based documentation.
578
+
579
+ ### Agent Modules
580
+
581
+ | Agent | Purpose |
582
+ |-------|---------|
583
+ | **LogicExplainer** | Analyzes code units to extract design rationale, trade-offs, and side effects. Provides the "why" behind code decisions. |
584
+ | **LLMSynthesizer** | Orchestrates documentation generation using templates. Builds prompts with evidence, generates sections, and validates output against rules. |
585
+ | **EditorAgent** | Reviews synthesized documentation against repository evidence. Identifies factual errors, missing rationale, and generates improvement prompts for IDE refinement. |
586
+
587
+ ### How the Agents Work Together
588
+
589
+ ```mermaid
590
+ flowchart TD
591
+ subgraph Input
592
+ A[Repository] --> B[Indexer]
593
+ A --> C[README Extractor]
594
+ end
595
+
596
+ subgraph Evidence Collection
597
+ B --> D[Hybrid Search<br/>BM25 + Vector]
598
+ C --> E[Essentials<br/>Install/Quickstart]
599
+ D --> F[Code Patterns]
600
+ F --> G[LogicExplainer]
601
+ G --> H[Rationale Evidence]
602
+ end
603
+
604
+ subgraph Synthesis
605
+ E --> I[LLMSynthesizer]
606
+ F --> I
607
+ H --> I
608
+ I --> J[SYNTHESIZED_*.md<br/>Draft Documentation]
609
+ end
610
+
611
+ subgraph Review & Refinement
612
+ J --> K[EditorAgent]
613
+ H --> K
614
+ K --> L{Issues Found?}
615
+ L -->|Yes| M[Generate IDE Prompt]
616
+ L -->|No| N[Final Documentation]
617
+ M --> O[Cursor / Claude Code]
618
+ O --> N
619
+ end
620
+
621
+ style A fill:#e1f5fe
622
+ style N fill:#c8e6c9
623
+ style O fill:#fff3e0
624
+ ```
625
+
626
+ ### Core Infrastructure
627
+
628
+ - **AST-based Chunking**: Semantic parsing of code into meaningful units
629
+ - **Hybrid Search**: BM25 (keyword) + vector (semantic) search
630
+ - **Evidence-Based Generation**: All claims grounded in actual code
631
+ - **Validation Layer**: Ensures citations, word limits, and required elements
632
+ - **Fail-Fast Design**: Clear errors when evidence is insufficient
633
+
634
+ See [Architecture Documentation](docs/architecture/MODE_ARCHITECTURE_COMPARISON.md) for details.
635
+
636
+ ## 💡 Why Vibe2Doc is Different
637
+
638
+ Vibe2Doc prioritizes **completeness and accuracy** over generic descriptions:
639
+
640
+ - **Evidence-First**: Extracts installation/quickstart from README BEFORE analyzing code internals
641
+ - **Mandatory Validation**: FAILS if essential content (install, quickstart) is missing from output
642
+ - **No Paraphrasing**: Shows exact commands and code examples ("pip install X", not "can be installed")
643
+ - **Structured**: Follows strict priority: Essentials → Usage → Architecture → Internals
644
+ - **Auditable**: Every claim cites specific source (README or code location)
645
+
646
+ **Key Design Principle**: Users need to know HOW TO USE software before understanding how it works internally.
647
+
648
+ Vibe2Doc ensures documentation includes:
649
+ 1. Installation commands (MANDATORY if in README)
650
+ 2. Quickstart code example (MANDATORY if in README)
651
+ 3. Authentication setup (if applicable)
652
+ 4. THEN architecture and internals
653
+
654
+ ## 🔧 Troubleshooting
655
+
656
+ ### Common Issues
657
+
658
+ **API key errors**
659
+ - Create a `.env` file with `ANTHROPIC_API_KEY=your_key` or `OPENAI_API_KEY=your_key`
660
+ - API key is REQUIRED (LLM synthesis is mandatory for documentation)
661
+ - Check API key has sufficient credits/quota
662
+
663
+ **Missing installation or quickstart in output**
664
+ - This should NOT happen - validation will fail if essentials are missing
665
+ - If validation passed but content missing, file a bug
666
+ - Check that README actually contains installation/quickstart sections
667
+
668
+ **Verbose logging**
669
+ - Use `--verbose` flag for detailed logs
670
+ - Use `python -m pdb generate_docs.py` for debugging
671
+
672
+ **Import errors**
673
+ - Ensure your virtual environment is activated
674
+ - Run: `pip install -r requirements-claude-context.txt`
675
+ - Python 3.8+ required
676
+
677
+ **Memory/performance issues**
678
+ - Use .gitignore to exclude large directories
679
+ - Limit repository scope to specific subdirectories
680
+ - Reduce chunk size in configuration
681
+
682
+ ## 📚 Publishing Documentation
683
+
684
+ This section walks through the complete flow from generation to publishing on GitLab/GitHub Pages.
685
+
686
+ ### Complete Workflow: From Generation to Publishing
687
+
688
+ ```
689
+ ┌─────────────────────────────────────────────────────────────────────────────┐
690
+ │ PROJECTS/ │
691
+ │ ├── vibe2doc_withAgents/ ← Main repo │
692
+ │ │ ├── docs_speculators/ ← Step 1: Generated docs │
693
+ │ │ ├── docs/generated/speculators/ ← Step 3: Staged (gitignored) │
694
+ │ │ └── scripts/ │
695
+ │ │ ├── prepare_docs.py ← Copies to docs/generated/ │
696
+ │ │ └── publish_docs.py ← Copies to ../docs-output/ │
697
+ │ │ │
698
+ │ └── docs-output/ ← Step 4: External GitLab Pages repo │
699
+ │ ├── mkdocs.yml ← Auto-generated │
700
+ │ ├── .gitlab-ci.yml ← Auto-generated │
701
+ │ └── docs/speculators/ ← Published docs │
702
+ └─────────────────────────────────────────────────────────────────────────────┘
703
+ ```
704
+
705
+ #### Step 1: Generate and Refine Documentation
706
+
707
+ ```bash
708
+ # Generate docs with editor review
709
+ python generate_docs.py /path/to/speculators --review --output ./docs_speculators
710
+
711
+ # Prompt is copied to clipboard - paste into Cursor/Claude Code
712
+ # Make additional refinements in the IDE conversation
713
+ ```
714
+
715
+ **Output**: `./docs_speculators/SYNTHESIZED_*.md` (refined by IDE)
716
+
717
+ #### Step 2: Stage for Local Preview
718
+
719
+ ```bash
720
+ python scripts/prepare_docs.py
721
+ ```
722
+
723
+ This copies `docs_speculators/*.md` → `docs/generated/speculators/` (gitignored)
724
+
725
+ #### Step 3: Preview Locally (Optional)
726
+
727
+ ```bash
728
+ mkdocs serve
729
+ ```
730
+
731
+ Navigate to <http://127.0.0.1:8000> to preview before publishing.
732
+
733
+ #### Step 4: Publish to External Repo
734
+
735
+ ```bash
736
+ # Publish a single project
737
+ python3 scripts/publish_docs.py --project speculators
738
+
739
+ # Or publish all available projects
740
+ python3 scripts/publish_docs.py --all
741
+
742
+ # List available projects
743
+ python3 scripts/publish_docs.py --list
744
+ ```
745
+
746
+ This:
747
+ - Copies docs to `PROJECTS/docs-output/docs/speculators/`
748
+ - Auto-generates `mkdocs.yml` with navigation
749
+ - Creates `.gitlab-ci.yml` for GitLab Pages (first run only)
750
+
751
+ #### Step 5: Deploy to GitLab Pages
752
+
753
+ ```bash
754
+ cd ../docs-output
755
+ git add .
756
+ git commit -m "Update speculators documentation"
757
+ git push # Triggers GitLab Pages deployment
758
+ ```
759
+
760
+ ### Quick Reference: File Locations
761
+
762
+ | Stage | Location | Git Status |
763
+ |-------|----------|------------|
764
+ | Generated docs (raw) | `./docs_<project>/` | gitignored |
765
+ | Staged for preview | `./docs/generated/<project>/` | gitignored |
766
+ | Published docs | `../docs-output/docs/<project>/` | separate repo |
767
+ | Publishing config | `../docs-output/mkdocs.yml` | auto-generated |
768
+
769
+ ### Setting Up the External docs-output Repo
770
+
771
+ First-time setup for GitLab Pages:
772
+
773
+ ```bash
774
+ cd PROJECTS
775
+ mkdir docs-output
776
+ cd docs-output
777
+ git init
778
+ git remote add origin git@gitlab.com:yourusername/docs-output.git
779
+
780
+ # Run publish_docs.py to generate initial files
781
+ cd ../vibe2doc_withAgents
782
+ python scripts/publish_docs.py --project speculators
783
+
784
+ # Push to GitLab
785
+ cd ../docs-output
786
+ git add .
787
+ git commit -m "Initial documentation"
788
+ git push -u origin main
789
+ ```
790
+
791
+ GitLab Pages will automatically build and deploy from the `.gitlab-ci.yml`.
792
+
793
+ ## 🛠️ Development
794
+
795
+ ### Project Structure
796
+
797
+ ```
798
+ vibe2doc/
799
+ ├── src/ # Source code
800
+ │ ├── claude_context/ # Core documentation engine
801
+ │ │ ├── synthesis/ # LLM synthesis and editor agent
802
+ │ │ ├── indexer.py # Hybrid search indexing
803
+ │ │ ├── search.py # BM25 + vector search
804
+ │ │ ├── readme_extractor.py # README essentials extraction
805
+ │ │ └── mode_handler.py # Auto/Interactive modes
806
+ │ ├── analysis/ # Code analysis utilities
807
+ │ ├── core/ # Infrastructure (LLM providers, MCP)
808
+ │ ├── generation/ # Documentation generation
809
+ │ ├── utilities/ # Helper utilities
810
+ │ └── validation/ # Quality control
811
+ ├── templates/ # Synthesis YAML templates
812
+ ├── docs/ # Project documentation
813
+ ├── scripts/ # Helper scripts (prepare_docs.py, etc.)
814
+ └── requirements*.txt # Dependencies
815
+ ```
816
+
817
+ ## 🤝 Contributing
818
+
819
+ This project is under active development. For contribution guidelines and development setup, see the documentation in `docs/development/`.
820
+
821
+ ## 📄 License
822
+
823
+ [License information to be added]