PyPI - code-finder - Versions diffs - 0.1.0__tar.gz - Mend

code-finder 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

code_finder-0.1.0/PKG-INFO +823 -0
code_finder-0.1.0/README.md +798 -0
code_finder-0.1.0/pyproject.toml +51 -0
code_finder-0.1.0/setup.cfg +4 -0
code_finder-0.1.0/src/claude_context/__init__.py +33 -0
code_finder-0.1.0/src/claude_context/agentic_integration.py +309 -0
code_finder-0.1.0/src/claude_context/ast_chunker.py +646 -0
code_finder-0.1.0/src/claude_context/config.py +239 -0
code_finder-0.1.0/src/claude_context/context_manager.py +627 -0
code_finder-0.1.0/src/claude_context/embeddings.py +307 -0
code_finder-0.1.0/src/claude_context/embeddings_interface.py +226 -0
code_finder-0.1.0/src/claude_context/enhanced_ast_chunker.py +1129 -0
code_finder-0.1.0/src/claude_context/explorer.py +951 -0
code_finder-0.1.0/src/claude_context/explorer_with_context.py +1008 -0
code_finder-0.1.0/src/claude_context/indexer.py +893 -0
code_finder-0.1.0/src/claude_context/markdown_chunker.py +421 -0
code_finder-0.1.0/src/claude_context/mode_handler.py +1774 -0
code_finder-0.1.0/src/claude_context/query_metrics.py +164 -0
code_finder-0.1.0/src/claude_context/question_generator.py +800 -0
code_finder-0.1.0/src/claude_context/readme_extractor.py +485 -0
code_finder-0.1.0/src/claude_context/repository_adapter.py +399 -0
code_finder-0.1.0/src/claude_context/search.py +493 -0
code_finder-0.1.0/src/claude_context/skills/__init__.py +11 -0
code_finder-0.1.0/src/claude_context/skills/_cli_common.py +74 -0
code_finder-0.1.0/src/claude_context/skills/_index_manager.py +98 -0
code_finder-0.1.0/src/claude_context/skills/api_surface.py +219 -0
code_finder-0.1.0/src/claude_context/skills/evidence_retrieval.py +151 -0
code_finder-0.1.0/src/claude_context/skills/grounded_review.py +212 -0
code_finder-0.1.0/src/claude_context/synthesis/__init__.py +8 -0
code_finder-0.1.0/src/claude_context/synthesis/editor_agent.py +391 -0
code_finder-0.1.0/src/claude_context/synthesis/llm_synthesizer.py +153 -0
code_finder-0.1.0/src/claude_context/synthesis/logic_explainer.py +235 -0
code_finder-0.1.0/src/claude_context/synthesis/multi_review_pipeline.py +717 -0
code_finder-0.1.0/src/claude_context/synthesis/prompt_builder.py +439 -0
code_finder-0.1.0/src/claude_context/synthesis/providers.py +115 -0
code_finder-0.1.0/src/claude_context/synthesis/validators.py +458 -0
code_finder-0.1.0/src/code_finder.egg-info/PKG-INFO +823 -0
code_finder-0.1.0/src/code_finder.egg-info/SOURCES.txt +42 -0
code_finder-0.1.0/src/code_finder.egg-info/dependency_links.txt +1 -0
code_finder-0.1.0/src/code_finder.egg-info/entry_points.txt +4 -0
code_finder-0.1.0/src/code_finder.egg-info/requires.txt +18 -0
code_finder-0.1.0/src/code_finder.egg-info/top_level.txt +1 -0
code_finder-0.1.0/tests/test_all_components.py +378 -0
code_finder-0.1.0/tests/test_docstring_indexer.py +41 -0

code_finder-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,823 @@
+Metadata-Version: 2.4
+Name: code-finder
+Version: 0.1.0
+Summary: Code evidence retrieval and grounded review for documentation workflows. AST chunking, hybrid search (BM25 + vector), and API surface extraction.
+License-Expression: Apache-2.0
+Keywords: documentation,code analysis,code evidence,semantic search,ast,embeddings
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+Requires-Dist: pymilvus>=2.3.0
+Requires-Dist: milvus-lite>=2.3.0
+Requires-Dist: sentence-transformers>=2.2.0
+Requires-Dist: rank-bm25
+Requires-Dist: numpy>=1.24.0
+Requires-Dist: tqdm>=4.65.0
+Requires-Dist: tree-sitter
+Requires-Dist: tree-sitter-python
+Requires-Dist: tree-sitter-javascript
+Requires-Dist: tree-sitter-typescript
+Requires-Dist: tree-sitter-go
+Provides-Extra: synthesis
+Requires-Dist: anthropic>=0.34.0; extra == "synthesis"
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0; extra == "dev"
+Requires-Dist: ruff; extra == "dev"
+# Vibe2Doc
+AI-powered documentation generation system that creates grounded, evidence-based documentation from your codebase using semantic search and LLM synthesis.
+Vibe2Doc is a command-line tool that generates comprehensive documentation directly from your terminal. For higher quality results, choose between:
+- **Multi-Review Pipeline** (`--multi-review`): Fully automated writer + 3 specialist reviewers (91% accuracy, ~30 min)
+- **Two-Pass Workflow** (`--review`): Manual IDE refinement with Cursor/Claude Code for final polish
+## 🚀 Key Features
+- **Evidence-Driven Documentation**: Prioritizes README essentials (install, quickstart) before code internals
+- **Multi-README Support**: Automatically discovers READMEs throughout your repository (root + subdirectories)
+- **LLM-Powered**: Uses Claude Sonnet 4 to synthesize complete documentation
+- **Mandatory Validation**: Fails if essential content (installation, quickstart) is missing
+- **Two Modes**: Automatic (fast) or Interactive (with Q&A for gaps)
+- **Quality Enhancement Options**:
+  - **Multi-Review Pipeline** (`--multi-review`): Automated writer + 3 specialist reviewers with fresh-eyes isolation and regression checks
+  - **Two-Pass Workflow** (`--review`): Manual IDE refinement for final polish with full codebase context
+- **Semantic Search**: AST-based code chunking with hybrid BM25 + vector search
+- **Context Integration**: Include requirements, design docs, and other context files
+## 📋 Quick Start
+### Installation
+```bash
+# Clone the repository
+git clone https://github.com/yourusername/vibe2doc.git
+cd vibe2doc
+# Create and activate virtual environment
+python -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+# Install dependencies
+pip install -r requirements-claude-context.txt
+# Set up environment variables (for LLM synthesis)
+cp .env.example .env
+# Edit .env and add your API keys:
+# ANTHROPIC_API_KEY=your_key_here
+# OPENAI_API_KEY=your_key_here  (optional)
+```
+### Clone the Repository You Want to Document
+Before generating documentation, you need to clone the GitHub project you want to document:
+1. Navigate to the GitHub repository you want to document
+2. Clone it to your local machine:
+```bash
+# Option 1: Clone into the same parent directory as vibe2doc
+cd /path/to/parent-directory
+git clone https://github.com/owner/target-repo.git
+# Option 2: Clone to any location
+git clone https://github.com/owner/target-repo.git /path/to/your/preferred/location
+```
+**Note**: When running `generate_docs.py`, you'll provide the path to this cloned repository. If you cloned it next to vibe2doc, you can use a relative path like `../target-repo`. Otherwise, use the full absolute path.
+### Basic Usage
+```bash
+# Automatic mode (default) - Fast, complete documentation
+python generate_docs.py /path/to/repo --output ./docs
+# With context files (recommended for better docs)
+python generate_docs.py /path/to/repo \
+  --context requirements.md design.md \
+  --output ./docs
+# Interactive mode - Ask targeted questions to fill gaps
+python generate_docs.py /path/to/repo \
+  --interactive \
+  --context requirements.md \
+  --output ./docs
+# User-focused (quickstart-first) vs technical (architecture-first)
+python generate_docs.py /path/to/repo \
+  --user-focused \
+  --output ./docs
+# With editor review (manual IDE refinement) - LLM critiques and refines each section
+python generate_docs.py /path/to/repo \
+  --review \
+  --output ./docs
+# With multi-review pipeline (automated) - Writer + 3 specialist reviewers
+python generate_docs.py /path/to/repo \
+  --multi-review \
+  --output ./docs
+# With custom synthesis template
+python generate_docs.py /path/to/repo \
+  --synthesis-template ./templates/synthesis_universal.yaml \
+  --output ./docs
+# Use only root README (legacy behavior, skips subdirectory READMEs)
+python generate_docs.py /path/to/repo \
+  --single-readme \
+  --output ./docs
+```
+### Output
+Vibe2Doc generates comprehensive documentation:
+- `SYNTHESIZED_GETTING_STARTED.md` - Installation, quick start, and first-run guidance
+- `SYNTHESIZED_TERMINOLOGY.md` - Glossary of domain-specific terms and acronyms
+- `SYNTHESIZED_CONCEPTS.md` - How the system works, architecture, mental models
+- `SYNTHESIZED_SCHEMES_AND_PATTERNS.md` - High-level workflows, recipes, and decision points
+- `SYNTHESIZED_HOW_TO_GUIDES.md` - Scenario-driven how-to guides with step-by-step commands
+- `SYNTHESIZED_API_REFERENCE.md` - Core classes, functions, and CLI commands
+- `SYNTHESIZED_TROUBLESHOOTING.md` - Common issues, error messages, and solutions
+- `SYNTHESIZED_SOURCES.md` - Citations and evidence transparency
+- `SYNTHESIZED_TUTORIALS.md` - Learning-oriented walkthroughs (if enabled)
+- `SYNTHESIZED_DEVELOPER_GUIDE.md` - Contributor setup and guidelines (if enabled)
+- `REQUIREMENTS_TRACING.md` - Requirements mapping (if context files provided)
+- `editor/` - Editor-reviewed revisions (when using `--review` flag)
+**Editor Review Pass** (with `--review` flag):
+After initial synthesis, an LLM-powered editor agent critiques each section, identifies factual errors or missing rationale, and proposes refined versions. The editor uses the same hybrid search infrastructure to ground all revisions in repository evidence. Refined drafts are saved to the `editor/` subdirectory.
+**Note**: All files are LLM-synthesized with evidence-based validation
+## 📖 Documentation Modes
+### Two-Pass Workflow (with `--review` flag)
+Combine automatic synthesis with an editor agent that critiques the draft, then refine using your IDE's AI assistant (Cursor Composer or Claude Code) which has full codebase context:
+1. **Auto Synthesis** – Run `generate_docs.py` to collect evidence and produce `SYNTHESIZED_*.md` files with citations.
+2. **Editor Review** – Add the `--review` flag to invoke the editor agent:
+   ```bash
+   python generate_docs.py /path/to/repo --review --output ./docs
+   ```
+   The editor reviews each section, identifies issues (factual errors, missing rationale, style gaps), and generates a single combined prompt covering all sections.
+3. **IDE Refinement** – Use the generated prompt with your IDE's AI assistant:
+   - The combined prompt is automatically copied to your clipboard
+   - Open your project in Cursor or VS Code with Claude Code
+   - Paste the prompt (Cmd+V) into Composer or Claude Code chat
+   - The AI will refine all documentation files using its full understanding of your codebase
+4. **Continue the Conversation** – After the initial fixes, you can continue chatting with your IDE's AI to:
+   - Ask questions: "Can you explain the authentication flow in more detail?"
+   - Request enhancements: "Add a troubleshooting section for common errors"
+   - Fill gaps: "The API reference is missing the batch processing endpoint"
+5. **Publishing** – Use `scripts/prepare_docs.py` to copy the refined output into `docs/generated/` for MkDocs preview.
+**Why not just use Cursor/Claude Code directly?** You could ask your IDE to write documentation from scratch, but you'd get generic, surface-level content. Vibe2Doc provides critical value that the IDE refinement builds upon:
+| What Vibe2Doc Provides | Why It Matters |
+|------------------------|----------------|
+| **Structured evidence collection** | Systematically extracts installation commands, quickstart examples, and API patterns - ensuring nothing essential is missed |
+| **Rationale extraction** | LogicExplainer captures the *why* behind code decisions - trade-offs, side effects, design intent that aren't obvious from code alone |
+| **Validated first draft** | The synthesized docs are already grounded in evidence with citations, giving the IDE a solid foundation to refine rather than invent |
+| **Targeted review prompts** | EditorAgent identifies specific issues (not vague suggestions), so the IDE knows exactly what to fix |
+**The result**: Your IDE's AI refines *evidence-based documentation* rather than hallucinating from scratch. It fills gaps and improves style while the factual foundation remains grounded in your actual codebase.
+Use this mode when you want the highest quality documentation with accurate, context-aware refinements.
+### Multi-Review Refinement (Two-Step Workflow) ⭐ RECOMMENDED
+**The most reliable workflow**: Generate high-quality docs with `--review`, then refine in Claude Code with `/multi-review` skill.
+#### Step 1: Generate Base Documentation (Fast & Reliable)
+```bash
+python3 generate_docs.py /path/to/repo --review --output ./docs
+```
+**Runtime**: 15-20 minutes
+**Accuracy**: 85-97% (excellent quality, ready to publish)
+**No timeouts**: Stable, proven workflow
+#### Step 2: Multi-Review Refinement in Claude Code (Optional Polish)
+If you want to push accuracy to 95-99% and remove boilerplate repetition:
+```bash
+# In Claude Code chat or terminal
+/multi-review docs
+```
+**What the `/multi-review` skill does**:
+1. **Reviewer A (Technical Accuracy)** – Batch-reviews 5 docs at a time
+   - Verifies code examples, function signatures, file paths
+   - Checks import statements and API references
+   - Validates citations point to real files
+2. **Reviewer B (Boilerplate Removal)** ⭐ **PRIMARY FOCUS**
+   - **Removes duplicate Installation/Quickstart/Citation blocks**
+   - Ensures ONLY `SYNTHESIZED_GETTING_STARTED.md` has full setup instructions
+   - Replaces duplicate content with cross-references:
+     `"For installation, see [Getting Started](../SYNTHESIZED_GETTING_STARTED)"`
+   - Checks learning flow (beginner → advanced)
+3. **Reviewer C (Consistency + Safety Check)**
+   - Verifies naming consistency, citation format, cross-reference links
+   - Final check for any remaining boilerplate duplication
+   - Catches regressions introduced by previous reviewers
+**Runtime**: 15-25 minutes (batched to avoid timeouts)
+**Output**: Refined docs + `REVIEW_LOG.json` audit trail
+**Accuracy improvement**: 85-97% → 95-99%
+#### Why Two Steps is Better Than Automated Pipeline
+| Aspect | Automated `--multi-review` | Two-Step Workflow |
+|--------|---------------------------|-------------------|
+| **Base generation** | ❌ Timeouts on large repos | ✅ Always completes (15-20 min) |
+| **Refinement** | ❌ Timeouts, expensive | ✅ Claude Code handles it well |
+| **Cost** | $$$ (many long API calls) | $ (base) + $$ (refinement when needed) |
+| **Control** | ❌ Black box, can't intervene | ✅ Monitor, stop, resume |
+| **Failure recovery** | ❌ Start over | ✅ Continue from where it stopped |
+| **Repo context** | ⚠️ Via searcher | ✅ Direct Read/Edit/Write tools |
+#### When to Use Multi-Review Refinement
+✅ **Use `/multi-review` skill when**:
+- You see duplicate quickstart/installation sections across docs
+- You want 95-99% accuracy (vs 85-97% from base generation)
+- You need an audit trail of review decisions (`REVIEW_LOG.json`)
+- You're documenting workflows, architecture, tutorials
+❌ **Skip refinement when**:
+- Base docs are already 95%+ accurate (run validation to check)
+- You're generating API reference (use `mkdocstrings` instead)
+- Time is critical (base docs are already high quality)
+#### Example Workflow
+```bash
+# 1. Generate base docs (fast, reliable)
+python3 generate_docs.py speculators --review --output ./docs
+# 2. Validate accuracy
+python3 validate_documentation_accuracy.py docs/ speculators/
+# Result: 96.8% accuracy ✅
+# 3. (Optional) If you want to remove boilerplate and push to 98%+
+# In Claude Code:
+/multi-review docs
+# 4. Validate again
+python3 validate_documentation_accuracy.py docs/ speculators/
+# Result: 98.5% accuracy ✅
+```
+**Result**: High-quality documentation with no timeouts, full control, and optional refinement when needed.
+**Comparison with `--review` flag**:
+| Aspect | `--review` (Manual) | `--multi-review` (Automated) |
+|--------|---------------------|------------------------------|
+| **Interaction** | Manual IDE refinement | Fully automated |
+| **Time** | 15-45 min (depends on IDE work) | ~30 min (automatic) |
+| **Quality** | Highest (human + AI context) | Very good (91% accuracy) |
+| **Best for** | Final polish, gap filling | Conceptual docs, tutorials |
+| **Audit trail** | IDE chat history | REVIEW_LOG.json |
+| **Repeatability** | Manual steps | Fully reproducible |
+**Recommended workflow**: Use `--multi-review` for automated baseline quality, then optionally use `--review` for final human-guided refinement if needed.
+### Automatic Mode (Default)
+Fast, automatic documentation generation from code and README.
+- **Use when**: You want complete documentation quickly
+- **Process**:
+  1. Extracts essentials from README (install, quickstart, auth)
+  2. Analyzes code (architecture, API, patterns)
+  3. Synthesizes complete documentation with LLM
+- **Output**: Complete docs with installation, quickstart, architecture, API
+- **Time**: 5-10 minutes
+### Interactive Mode
+Guided documentation with targeted Q&A to fill gaps.
+- **Use when**: You want to add design insights and context
+- **Process**:
+  1. Auto-extracts what it can (same as Automatic)
+  2. Shows coverage: what's documented vs what's missing
+  3. Asks targeted questions to fill gaps
+  4. Generates complete documentation
+- **Output**: Complete docs + human insights
+- **Time**: 15-30 minutes
+## 🎯 Validating Documentation Accuracy
+Vibe2Doc includes an **automated accuracy validation tool** that objectively tests generated documentation against your codebase.
+### Quick Validation
+```bash
+# Validate documentation accuracy
+python3 validate_documentation_accuracy.py docs/ /path/to/repo/
+# With detailed JSON report
+python3 validate_documentation_accuracy.py \
+  docs/ \
+  /path/to/repo/ \
+  --output accuracy_report.json
+```
+### What It Tests
+The validator automatically checks **7 categories** of accuracy:
+| Category | Tests | Example |
+|----------|-------|---------|
+| **Installation Commands** | `pip install` syntax validity | ✅ `pip install speculators` |
+| **File Paths** | Referenced files exist in repo | ✅ `scripts/data_generation.py` exists |
+| **Import Statements** | Modules/functions are real | ✅ `from pkg.module import func` works |
+| **API Signatures** | Functions mentioned exist | ✅ `load_model_layers()` found in source |
+| **Citations** | `[CITE:file]` points to real files | ✅ All citation targets exist |
+| **Code Examples** | Python syntax is valid | ✅ Code blocks parse correctly |
+| **Command Syntax** | No typos in bash commands | ✅ Commands have correct syntax |
+### Sample Output
+```
+================================================================================
+📊 DOCUMENTATION ACCURACY REPORT
+================================================================================
+Overall Accuracy: 92.8%
+Total Tests: 209
+✅ Passed: 194
+❌ Failed: 15
+By Category:
+--------------------------------------------------------------------------------
+  citations            122/122 (100.0%) ✅
+  code_examples          6/  6 (100.0%) ✅
+  commands              45/ 45 (100.0%) ✅
+  file_paths            11/ 19 ( 57.9%) ❌
+  imports                2/  9 ( 22.2%) ❌
+  installation           8/  8 (100.0%) ✅
+```
+### Accuracy Grading
+| Score | Grade | Action |
+|-------|-------|--------|
+| 95-100% | A+ | ✅ Production ready |
+| 90-94% | A | ✅ Publish (multi-review achieves this) |
+| 85-89% | B+ | ⚠️ Review failures, fix critical |
+| 80-84% | B | ⚠️ Fix critical/major issues |
+| <80% | C-F | ❌ Fix before publishing |
+**Multi-review pipeline achievement**: 92.8% accuracy (A grade)
+### CI/CD Integration
+The validator exits with code 0 if accuracy ≥ 80%, making it perfect for CI/CD:
+```bash
+# Quality gate in your workflow
+python3 generate_docs.py repo/ --multi-review --output docs/ && \
+python3 validate_documentation_accuracy.py docs/ repo/
+if [ $? -eq 0 ]; then
+  echo "✅ Documentation validated - ready to publish"
+else
+  echo "❌ Accuracy below threshold - review errors"
+  exit 1
+fi
+```
+For detailed guidance, see [ACCURACY_VALIDATION_GUIDE.md](ACCURACY_VALIDATION_GUIDE.md).
+## 📚 Multi-README Support
+Vibe2Doc automatically discovers and incorporates README files from throughout your repository:
+### How It Works
+- **Root README** (highest priority): Used for installation, quickstart, and authentication essentials
+- **Module READMEs**: Provide context for specific subsystems, incorporated into How-To Guides
+- **Automatic discovery**: No need to manually specify context files for READMEs
+- **Smart filtering**: Ignores READMEs in `node_modules/`, `venv/`, `build/`, `.git/`, etc.
+### Example Repository Structure
+```
+myproject/
+├── README.md              # Main project overview, installation (→ Getting Started)
+├── src/
+│   ├── api/
+│   │   └── README.md      # API-specific examples, endpoints (→ How-To Guides)
+│   └── cli/
+│       └── README.md      # CLI command reference (→ How-To Guides)
+├── examples/
+│   └── README.md          # Usage examples (→ How-To Guides)
+└── .v2dignore             # Optional: exclude specific paths
+```
+### Limits and Safeguards
+| Limit | Value | Purpose |
+|-------|-------|---------|
+| Max depth | 4 levels | Avoid deeply nested READMEs |
+| Max READMEs | 15 | Prevent context explosion |
+| Max per-file | 10K chars | Truncate oversized READMEs |
+| Max total | 100K chars | Protect token budget |
+| Min size | 200 chars | Skip stubs |
+### Custom Ignore Patterns (.v2dignore)
+Create a `.v2dignore` file in your repository root to exclude specific READMEs:
+```bash
+# .v2dignore - gitignore-style patterns
+examples/legacy/     # Ignore legacy examples directory
+vendor/              # Ignore vendored dependencies
+**/internal/*        # Ignore internal module READMEs
+```
+### Legacy Mode
+Use `--single-readme` to revert to the previous behavior (only root README):
+```bash
+python generate_docs.py /path/to/repo --single-readme --output ./docs
+```
+## ⚙️ Configuration
+### Environment Variables
+```bash
+# REQUIRED: API key for LLM synthesis (documentation generation requires this)
+ANTHROPIC_API_KEY=sk-ant-...  # Claude Sonnet 4
+# Optional configuration
+VIBE2DOC_MODEL=claude-sonnet-4-5@20250929
+VIBE2DOC_TEMPERATURE=0.2
+VIBE2DOC_MAX_TOKENS=1200
+```
+### Synthesis Templates
+Vibe2Doc uses `synthesis_universal.yaml` by default, which provides comprehensive documentation structure based on the [Diátaxis framework](https://diataxis.fr/). You can customize synthesis behavior by creating your own YAML template:
+```bash
+# Use the default universal template
+python generate_docs.py /path/to/repo --output ./docs
+# Use a custom template
+python generate_docs.py /path/to/repo \
+  --synthesis-template ./templates/my_custom_template.yaml \
+  --output ./docs
+```
+#### Documentation Content Types
+The template generates different types of documentation, each serving a distinct purpose:
+| Content Type | Purpose | User Question Answered |
+|--------------|---------|------------------------|
+| **Getting Started** | Quick path to first success | "How do I install and run this?" |
+| **Terminology** | Glossary of domain terms | "What does X mean?" |
+| **Concepts** | Understanding how it works | "How does this system work?" |
+| **Schemes & Patterns** | Decision guidance | "Which approach should I use?" |
+| **How-To Guides** | Task completion | "How do I accomplish X?" |
+| **API Reference** | Complete, accurate details | "What are the exact parameters?" |
+| **Troubleshooting** | Problem resolution | "Why isn't this working?" |
+| **Tutorials** *(optional)* | Guided learning | "Teach me how to use this" |
+| **Developer Guide** *(optional)* | Contribution guidance | "How do I contribute?" |
+#### Core Sections (Always Generated)
+These sections are generated by default for every project:
+- **Getting Started** (2500 words) - Installation, prerequisites, quickstart example, navigation hints
+- **Terminology** (800 words) - Glossary of domain-specific terms and acronyms
+- **Concepts** (2500 words) - How the system works, architecture overview, mental models
+- **Schemes and Patterns** (2500 words) - Workflows, decision guides, compatibility tables
+- **How-To Guides** (3000 words) - Step-by-step task completion guides
+- **API Reference** (3500 words) - Classes, functions, CLI commands with parameter tables
+- **Troubleshooting** (1500 words) - Common issues, error messages, solutions
+- **Sources** (400 words) - Citations and evidence transparency
+#### Optional Sections
+These sections are disabled by default but can be enabled for projects that need them.
+##### Tutorials (Learning-Oriented)
+**When to enable**: Projects with significant learning curves where users need guided exploration.
+**Tutorial vs How-To Guide**:
+- **Tutorial**: "Learn speculative decoding by building a simple speculator" - teaches concepts through doing, explains *why* at each step
+- **How-To**: "Deploy a speculator to vLLM" - assumes understanding, focuses on task completion
+**Enable when**:
+- Project has complex concepts that benefit from step-by-step learning
+- There are example projects, notebooks, or demos in the codebase
+- README mentions "tutorial", "learn", or "getting started guide"
+- New users frequently struggle with the same conceptual hurdles
+**How to enable**: Edit `synthesis_universal.yaml`:
+```yaml
+- name: Tutorials
+  enabled: true  # Change from false to true
+  order: 9
+  max_words: 3000
+```
+##### Developer Guide (Contributor-Oriented)
+**When to enable**: Open-source projects that accept external contributions.
+**Enable when**:
+- Project has a CONTRIBUTING.md or contribution guidelines
+- There's a test suite, CI/CD pipeline, or code quality tools
+- You want to onboard new contributors efficiently
+- The development setup differs from user installation
+**What it covers**:
+- Development environment setup (clone, install dev dependencies)
+- Running tests (unit, integration, e2e)
+- Code style and quality requirements (linting, formatting, pre-commit hooks)
+- Pull request process and CI/CD checks
+**How to enable**: Edit `synthesis_universal.yaml`:
+```yaml
+- name: Developer Guide
+  enabled: true  # Change from false to true
+  order: 10
+  max_words: 2000
+```
+#### Word Count Guidelines
+Word counts are tuned for typical projects but can be adjusted:
+| Section | Default | Increase If... |
+|---------|---------|----------------|
+| API Reference | 3500 | Large API surface (50+ endpoints) |
+| How-To Guides | 3000 | Many distinct use cases (5+) |
+| Concepts | 2500 | Complex architecture |
+| Tutorials | 3000 | Multi-step learning paths |
+**Customizing Templates**: See [templates/README.md](templates/README.md) for detailed guidance on creating and modifying synthesis templates, including section structure, word limits, citation requirements, and validation rules.
+## 🏗️ Architecture
+Vibe2Doc combines semantic code search with LLM-powered agents that work together to generate accurate, evidence-based documentation.
+### Agent Modules
+| Agent | Purpose |
+|-------|---------|
+| **LogicExplainer** | Analyzes code units to extract design rationale, trade-offs, and side effects. Provides the "why" behind code decisions. |
+| **LLMSynthesizer** | Orchestrates documentation generation using templates. Builds prompts with evidence, generates sections, and validates output against rules. |
+| **EditorAgent** | Reviews synthesized documentation against repository evidence. Identifies factual errors, missing rationale, and generates improvement prompts for IDE refinement. |
+### How the Agents Work Together
+```mermaid
+flowchart TD
+    subgraph Input
+        A[Repository] --> B[Indexer]
+        A --> C[README Extractor]
+    end
+    subgraph Evidence Collection
+        B --> D[Hybrid Search<br/>BM25 + Vector]
+        C --> E[Essentials<br/>Install/Quickstart]
+        D --> F[Code Patterns]
+        F --> G[LogicExplainer]
+        G --> H[Rationale Evidence]
+    end
+    subgraph Synthesis
+        E --> I[LLMSynthesizer]
+        F --> I
+        H --> I
+        I --> J[SYNTHESIZED_*.md<br/>Draft Documentation]
+    end
+    subgraph Review & Refinement
+        J --> K[EditorAgent]
+        H --> K
+        K --> L{Issues Found?}
+        L -->|Yes| M[Generate IDE Prompt]
+        L -->|No| N[Final Documentation]
+        M --> O[Cursor / Claude Code]
+        O --> N
+    end
+    style A fill:#e1f5fe
+    style N fill:#c8e6c9
+    style O fill:#fff3e0
+```
+### Core Infrastructure
+- **AST-based Chunking**: Semantic parsing of code into meaningful units
+- **Hybrid Search**: BM25 (keyword) + vector (semantic) search
+- **Evidence-Based Generation**: All claims grounded in actual code
+- **Validation Layer**: Ensures citations, word limits, and required elements
+- **Fail-Fast Design**: Clear errors when evidence is insufficient
+See [Architecture Documentation](docs/architecture/MODE_ARCHITECTURE_COMPARISON.md) for details.
+## 💡 Why Vibe2Doc is Different
+Vibe2Doc prioritizes **completeness and accuracy** over generic descriptions:
+- **Evidence-First**: Extracts installation/quickstart from README BEFORE analyzing code internals
+- **Mandatory Validation**: FAILS if essential content (install, quickstart) is missing from output
+- **No Paraphrasing**: Shows exact commands and code examples ("pip install X", not "can be installed")
+- **Structured**: Follows strict priority: Essentials → Usage → Architecture → Internals
+- **Auditable**: Every claim cites specific source (README or code location)
+**Key Design Principle**: Users need to know HOW TO USE software before understanding how it works internally.
+Vibe2Doc ensures documentation includes:
+1. Installation commands (MANDATORY if in README)
+2. Quickstart code example (MANDATORY if in README)
+3. Authentication setup (if applicable)
+4. THEN architecture and internals
+## 🔧 Troubleshooting
+### Common Issues
+**API key errors**
+- Create a `.env` file with `ANTHROPIC_API_KEY=your_key` or `OPENAI_API_KEY=your_key`
+- API key is REQUIRED (LLM synthesis is mandatory for documentation)
+- Check API key has sufficient credits/quota
+**Missing installation or quickstart in output**
+- This should NOT happen - validation will fail if essentials are missing
+- If validation passed but content missing, file a bug
+- Check that README actually contains installation/quickstart sections
+**Verbose logging**
+- Use `--verbose` flag for detailed logs
+- Use `python -m pdb generate_docs.py` for debugging
+**Import errors**
+- Ensure your virtual environment is activated
+- Run: `pip install -r requirements-claude-context.txt`
+- Python 3.8+ required
+**Memory/performance issues**
+- Use .gitignore to exclude large directories
+- Limit repository scope to specific subdirectories
+- Reduce chunk size in configuration
+## 📚 Publishing Documentation
+This section walks through the complete flow from generation to publishing on GitLab/GitHub Pages.
+### Complete Workflow: From Generation to Publishing
+```
+┌─────────────────────────────────────────────────────────────────────────────┐
+│  PROJECTS/                                                                   │
+│  ├── vibe2doc_withAgents/          ← Main repo                              │
+│  │   ├── docs_speculators/         ← Step 1: Generated docs                 │
+│  │   ├── docs/generated/speculators/ ← Step 3: Staged (gitignored)          │
+│  │   └── scripts/                                                           │
+│  │       ├── prepare_docs.py       ← Copies to docs/generated/              │
+│  │       └── publish_docs.py       ← Copies to ../docs-output/              │
+│  │                                                                          │
+│  └── docs-output/                  ← Step 4: External GitLab Pages repo     │
+│      ├── mkdocs.yml                ← Auto-generated                         │
+│      ├── .gitlab-ci.yml            ← Auto-generated                         │
+│      └── docs/speculators/         ← Published docs                         │
+└─────────────────────────────────────────────────────────────────────────────┘
+```
+#### Step 1: Generate and Refine Documentation
+```bash
+# Generate docs with editor review
+python generate_docs.py /path/to/speculators --review --output ./docs_speculators
+# Prompt is copied to clipboard - paste into Cursor/Claude Code
+# Make additional refinements in the IDE conversation
+```
+**Output**: `./docs_speculators/SYNTHESIZED_*.md` (refined by IDE)
+#### Step 2: Stage for Local Preview
+```bash
+python scripts/prepare_docs.py
+```
+This copies `docs_speculators/*.md` → `docs/generated/speculators/` (gitignored)
+#### Step 3: Preview Locally (Optional)
+```bash
+mkdocs serve
+```
+Navigate to <http://127.0.0.1:8000> to preview before publishing.
+#### Step 4: Publish to External Repo
+```bash
+# Publish a single project
+python3 scripts/publish_docs.py --project speculators
+# Or publish all available projects
+python3 scripts/publish_docs.py --all
+# List available projects
+python3 scripts/publish_docs.py --list
+```
+This:
+- Copies docs to `PROJECTS/docs-output/docs/speculators/`
+- Auto-generates `mkdocs.yml` with navigation
+- Creates `.gitlab-ci.yml` for GitLab Pages (first run only)
+#### Step 5: Deploy to GitLab Pages
+```bash
+cd ../docs-output
+git add .
+git commit -m "Update speculators documentation"
+git push  # Triggers GitLab Pages deployment
+```
+### Quick Reference: File Locations
+| Stage | Location | Git Status |
+|-------|----------|------------|
+| Generated docs (raw) | `./docs_<project>/` | gitignored |
+| Staged for preview | `./docs/generated/<project>/` | gitignored |
+| Published docs | `../docs-output/docs/<project>/` | separate repo |
+| Publishing config | `../docs-output/mkdocs.yml` | auto-generated |
+### Setting Up the External docs-output Repo
+First-time setup for GitLab Pages:
+```bash
+cd PROJECTS
+mkdir docs-output
+cd docs-output
+git init
+git remote add origin git@gitlab.com:yourusername/docs-output.git
+# Run publish_docs.py to generate initial files
+cd ../vibe2doc_withAgents
+python scripts/publish_docs.py --project speculators
+# Push to GitLab
+cd ../docs-output
+git add .
+git commit -m "Initial documentation"
+git push -u origin main
+```
+GitLab Pages will automatically build and deploy from the `.gitlab-ci.yml`.
+## 🛠️ Development
+### Project Structure
+```
+vibe2doc/
+├── src/                         # Source code
+│   ├── claude_context/         # Core documentation engine
+│   │   ├── synthesis/          # LLM synthesis and editor agent
+│   │   ├── indexer.py          # Hybrid search indexing
+│   │   ├── search.py           # BM25 + vector search
+│   │   ├── readme_extractor.py # README essentials extraction
+│   │   └── mode_handler.py     # Auto/Interactive modes
+│   ├── analysis/               # Code analysis utilities
+│   ├── core/                   # Infrastructure (LLM providers, MCP)
+│   ├── generation/             # Documentation generation
+│   ├── utilities/              # Helper utilities
+│   └── validation/             # Quality control
+├── templates/                   # Synthesis YAML templates
+├── docs/                       # Project documentation
+├── scripts/                    # Helper scripts (prepare_docs.py, etc.)
+└── requirements*.txt           # Dependencies
+```
+## 🤝 Contributing
+This project is under active development. For contribution guidelines and development setup, see the documentation in `docs/development/`.
+## 📄 License
+[License information to be added]