PyPI - devmind-cli - Versions diffs - 0.1.0__tar.gz - Mend

devmind-cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

devmind_cli-0.1.0/PKG-INFO +166 -0
devmind_cli-0.1.0/README.md +136 -0
devmind_cli-0.1.0/devmind/__init__.py +2 -0
devmind_cli-0.1.0/devmind/cli.py +285 -0
devmind_cli-0.1.0/devmind/ingestion/comment_extractor.py +91 -0
devmind_cli-0.1.0/devmind/ingestion/file_reader.py +92 -0
devmind_cli-0.1.0/devmind/ingestion/git_parser.py +72 -0
devmind_cli-0.1.0/devmind/integrations/claude_code.py +44 -0
devmind_cli-0.1.0/devmind/memory.py +261 -0
devmind_cli-0.1.0/devmind/web/app.py +86 -0
devmind_cli-0.1.0/devmind_cli.egg-info/PKG-INFO +166 -0
devmind_cli-0.1.0/devmind_cli.egg-info/SOURCES.txt +16 -0
devmind_cli-0.1.0/devmind_cli.egg-info/dependency_links.txt +1 -0
devmind_cli-0.1.0/devmind_cli.egg-info/entry_points.txt +2 -0
devmind_cli-0.1.0/devmind_cli.egg-info/requires.txt +16 -0
devmind_cli-0.1.0/devmind_cli.egg-info/top_level.txt +1 -0
devmind_cli-0.1.0/pyproject.toml +51 -0
devmind_cli-0.1.0/setup.cfg +4 -0

devmind_cli-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,166 @@
+Metadata-Version: 2.4
+Name: devmind-cli
+Version: 0.1.0
+Summary: DevMind - Semantic Codebase Memory and Agentic Search for Developers
+Author: Anishp-cell
+Project-URL: Homepage, https://github.com/Anishp-cell/devmind-CLI
+Project-URL: Issues, https://github.com/Anishp-cell/devmind-CLI/issues
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Development Status :: 3 - Alpha
+Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+Requires-Dist: cognee[fastembed]>=0.1.0
+Requires-Dist: typer[all]>=0.9.0
+Requires-Dist: fastapi>=0.100.0
+Requires-Dist: uvicorn>=0.22.0
+Requires-Dist: gitpython>=3.1.30
+Requires-Dist: fastmcp>=0.1.0
+Requires-Dist: python-dotenv>=1.0.0
+Requires-Dist: pydantic>=2.0.0
+Requires-Dist: jinja2>=3.1.2
+Requires-Dist: groq>=0.9.0
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0; extra == "dev"
+Requires-Dist: black>=23.0; extra == "dev"
+Requires-Dist: isort>=5.12; extra == "dev"
+Requires-Dist: mypy>=1.0; extra == "dev"
+# DevMind – Codebase Memory for Developers
+> "Your codebase finally has a memory."
+DevMind is a developer CLI tool and local web interface that gives your codebase a persistent, queryable memory powered by **Cognee**. It scans source files, git commit history, comments, and architectural decisions, building a hybrid graph-vector knowledge store. This persistent memory allows developers and AI coding assistants (via MCP) to query the codebase in plain English and carry context across infinite sessions.
+---
+## Features
+1. **One-Command Ingestion** (`devmind remember`): Scans the codebase, git logs, and code comments to feed `cognee.remember()`.
+2. **Plain-English Q&A** (`devmind ask "..."`): Uses `cognee.recall()` to retrieve grounded, context-aware answers from the memory graph.
+3. **Decision Logging** (`devmind log "..."`): Records Architecture Decision Records (ADRs) to capture design reasoning.
+4. **Memory Refresh** (`devmind refresh`): Automatically detects modified files, updates the graph, and runs `cognee.improve()`.
+5. **Surgical Forget** (`devmind forget --file ...`): Prunes specific file memory from the knowledge graph using `cognee.forget()`.
+6. **Claude Code MCP Server** (`devmind mcp`): Seamlessly integrates with Claude Code or Cursor via standard Model Context Protocol (MCP).
+7. **Local Dashboard UI** (`devmind dashboard`): Provides a clean visual panel showing memory status, search queries, and recent decisions.
+8. **Smart API Key Rotation**: Automatically detects, formats, and rotates between multiple Groq and OpenRouter API keys to balance rate limits on free-tier LLM access.
+---
+## Installation & Setup
+### 1. Clone the repository
+```bash
+git clone https://github.com/Anishp-cell/devmind-CLI.git
+cd devmind-CLI
+```
+### 2. Configure Environment Variables
+Copy `.env.example` to `.env` and fill in your keys:
+```bash
+cp .env.example .env
+```
+To run for **free**, configure your `.env` with a list of rotated API keys:
+```env
+LLM_PROVIDER="groq"
+# Add a comma-separated list of Groq keys (gsk_...) and/or OpenRouter keys (sk-or-v1-...)
+# The CLI automatically load-balances and routes requests to the correct endpoints!
+GROQ_API_KEYS="gsk_key1,sk-or-v1-key2,gsk_key3"
+EMBEDDING_PROVIDER="fastembed"
+EMBEDDING_MODEL="BAAI/bge-small-en-v1.5"
+EMBEDDING_DIMENSIONS="384"
+```
+### 3. Install DevMind
+Install the package in editable mode:
+```bash
+pip install -e .
+```
+---
+## CLI Command Reference
+*   **Ingest Codebase**:
+    ```bash
+    devmind remember
+    ```
+*   **Ask a Question**:
+    ```bash
+    devmind ask "Why did we switch to redis for the queue?"
+    ```
+*   **Log an Architectural Decision (ADR)**:
+    ```bash
+    devmind log "Chose FastAPI for the web UI because it supports async routes natively."
+    ```
+*   **Refresh Changed Memory**:
+    ```bash
+    devmind refresh
+    ```
+*   **Forget a Specific File**:
+    ```bash
+    devmind forget --file devmind/web/app.py
+    ```
+*   **Wipe Local Database Cache**:
+    ```bash
+    devmind forget --all
+    ```
+*   **Launch Web Dashboard**:
+    ```bash
+    devmind dashboard --port 8000
+    ```
+*   **Start MCP Server**:
+    ```bash
+    devmind mcp
+    ```
+---
+## Running the Mock Demo Project
+To test DevMind on a smaller project without polluting your main repo:
+1. Navigate to the demo directory:
+   ```bash
+   cd examples/demo_project
+   ```
+2. Build the memory of the demo:
+   ```bash
+   devmind remember --dir .
+   ```
+3. Query its memory:
+   ```bash
+   devmind ask "What open TODO tasks are left in main.py?"
+   devmind ask "Why do we use SQLite according to our architecture decisions?"
+   ```
+---
+## Claude Code MCP Integration
+To connect Claude Code to DevMind's memory, add the server to your Claude MCP config:
+```bash
+claude mcp add devmind "devmind mcp"
+```
+Alternatively, configure your project-level `.mcp.json` file in your project root:
+```json
+{
+  "mcpServers": {
+    "devmind": {
+      "command": "devmind",
+      "args": ["mcp"]
+    }
+  }
+}
+```
+---
+## AI Assistant Declaration
+Per the rules of **The Hangover Part AI Hackathon**, this project declares the use of **Claude** (via the Antigravity IDE agent) as an AI pair programmer.

devmind_cli-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,136 @@
+# DevMind – Codebase Memory for Developers
+> "Your codebase finally has a memory."
+DevMind is a developer CLI tool and local web interface that gives your codebase a persistent, queryable memory powered by **Cognee**. It scans source files, git commit history, comments, and architectural decisions, building a hybrid graph-vector knowledge store. This persistent memory allows developers and AI coding assistants (via MCP) to query the codebase in plain English and carry context across infinite sessions.
+---
+## Features
+1. **One-Command Ingestion** (`devmind remember`): Scans the codebase, git logs, and code comments to feed `cognee.remember()`.
+2. **Plain-English Q&A** (`devmind ask "..."`): Uses `cognee.recall()` to retrieve grounded, context-aware answers from the memory graph.
+3. **Decision Logging** (`devmind log "..."`): Records Architecture Decision Records (ADRs) to capture design reasoning.
+4. **Memory Refresh** (`devmind refresh`): Automatically detects modified files, updates the graph, and runs `cognee.improve()`.
+5. **Surgical Forget** (`devmind forget --file ...`): Prunes specific file memory from the knowledge graph using `cognee.forget()`.
+6. **Claude Code MCP Server** (`devmind mcp`): Seamlessly integrates with Claude Code or Cursor via standard Model Context Protocol (MCP).
+7. **Local Dashboard UI** (`devmind dashboard`): Provides a clean visual panel showing memory status, search queries, and recent decisions.
+8. **Smart API Key Rotation**: Automatically detects, formats, and rotates between multiple Groq and OpenRouter API keys to balance rate limits on free-tier LLM access.
+---
+## Installation & Setup
+### 1. Clone the repository
+```bash
+git clone https://github.com/Anishp-cell/devmind-CLI.git
+cd devmind-CLI
+```
+### 2. Configure Environment Variables
+Copy `.env.example` to `.env` and fill in your keys:
+```bash
+cp .env.example .env
+```
+To run for **free**, configure your `.env` with a list of rotated API keys:
+```env
+LLM_PROVIDER="groq"
+# Add a comma-separated list of Groq keys (gsk_...) and/or OpenRouter keys (sk-or-v1-...)
+# The CLI automatically load-balances and routes requests to the correct endpoints!
+GROQ_API_KEYS="gsk_key1,sk-or-v1-key2,gsk_key3"
+EMBEDDING_PROVIDER="fastembed"
+EMBEDDING_MODEL="BAAI/bge-small-en-v1.5"
+EMBEDDING_DIMENSIONS="384"
+```
+### 3. Install DevMind
+Install the package in editable mode:
+```bash
+pip install -e .
+```
+---
+## CLI Command Reference
+*   **Ingest Codebase**:
+    ```bash
+    devmind remember
+    ```
+*   **Ask a Question**:
+    ```bash
+    devmind ask "Why did we switch to redis for the queue?"
+    ```
+*   **Log an Architectural Decision (ADR)**:
+    ```bash
+    devmind log "Chose FastAPI for the web UI because it supports async routes natively."
+    ```
+*   **Refresh Changed Memory**:
+    ```bash
+    devmind refresh
+    ```
+*   **Forget a Specific File**:
+    ```bash
+    devmind forget --file devmind/web/app.py
+    ```
+*   **Wipe Local Database Cache**:
+    ```bash
+    devmind forget --all
+    ```
+*   **Launch Web Dashboard**:
+    ```bash
+    devmind dashboard --port 8000
+    ```
+*   **Start MCP Server**:
+    ```bash
+    devmind mcp
+    ```
+---
+## Running the Mock Demo Project
+To test DevMind on a smaller project without polluting your main repo:
+1. Navigate to the demo directory:
+   ```bash
+   cd examples/demo_project
+   ```
+2. Build the memory of the demo:
+   ```bash
+   devmind remember --dir .
+   ```
+3. Query its memory:
+   ```bash
+   devmind ask "What open TODO tasks are left in main.py?"
+   devmind ask "Why do we use SQLite according to our architecture decisions?"
+   ```
+---
+## Claude Code MCP Integration
+To connect Claude Code to DevMind's memory, add the server to your Claude MCP config:
+```bash
+claude mcp add devmind "devmind mcp"
+```
+Alternatively, configure your project-level `.mcp.json` file in your project root:
+```json
+{
+  "mcpServers": {
+    "devmind": {
+      "command": "devmind",
+      "args": ["mcp"]
+    }
+  }
+}
+```
+---
+## AI Assistant Declaration
+Per the rules of **The Hangover Part AI Hackathon**, this project declares the use of **Claude** (via the Antigravity IDE agent) as an AI pair programmer.

devmind_cli-0.1.0/devmind/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # DevMind: Codebase Memory for Developers
2	+ __version__ = "0.1.0"

devmind_cli-0.1.0/devmind/cli.py ADDED Viewed

@@ -0,0 +1,285 @@
+# pyrefly: ignore [missing-import]
+import typer
+import sys
+import asyncio
+import os
+import logging
+import warnings
+# Suppress ResourceWarning and DeprecationWarning from aiohttp/asyncio during garbage collection
+warnings.filterwarnings("ignore", category=ResourceWarning)
+warnings.filterwarnings("ignore", category=DeprecationWarning)
+# Suppress Windows proactor event loop SSL bugs during shutdown
+if sys.platform == 'win32':
+    asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
+from devmind.memory import initialize_cognee, remember_content, recall_query, improve_memory, forget_memory
+from devmind.ingestion.file_reader import scan_codebase_files
+from devmind.ingestion.git_parser import get_git_history
+from devmind.ingestion.comment_extractor import get_codebase_comments
+# Setup logging
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+logger = logging.getLogger("devmind.cli")
+def run_async(coro):
+    """
+    Custom asyncio runner that sets an exception handler to swallow
+    noisy Win32 socket teardown/closed event loop warnings on shutdown.
+    """
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    def silence_exceptions(loop, context):
+        exc = context.get("exception")
+        msg = context.get("message", "")
+        # Swallows Win32 10038/not-a-socket/Event loop is closed warnings during exit
+        if (exc and ("Event loop is closed" in str(exc) or "10038" in str(exc) or "socket" in str(exc))) or "Event loop is closed" in msg or "SSL transport" in msg:
+            return
+        loop.default_exception_handler(context)
+    loop.set_exception_handler(silence_exceptions)
+    try:
+        return loop.run_until_complete(coro)
+    finally:
+        try:
+            loop.run_until_complete(loop.shutdown_asyncgens())
+        except Exception:
+            pass
+        loop.close()
+app = typer.Typer(
+    name="devmind",
+    help="DevMind – Codebase Memory for Developers. Powered by Cognee.",
+    add_completion=False
+)
+async def remember_pipeline(directory: str):
+    """
+    Core async pipeline for scanning files, comments, and git logs,
+    and loading them into Cognee.
+    """
+    # 1. Scan the codebase files
+    files = scan_codebase_files(directory)
+    if not files:
+        typer.echo("No files found to ingest.")
+        return
+    typer.echo(f"Ingesting {len(files)} files into Cognee memory...")
+    # Ingest file contents
+    file_success = 0
+    for idx, file_data in enumerate(files, start=1):
+        rel_path = file_data["relative_path"]
+        content = file_data["content"]
+        tagged_content = f"File Path: {rel_path}\n---\n{content}"
+        dataset_name = rel_path.replace("/", "_").replace("\\", "_").replace(".", "_").replace(" ", "_")
+        logger.info(f"[{idx}/{len(files)}] Processing {rel_path}...")
+        success = await remember_content(tagged_content, dataset_name=dataset_name)
+        if success:
+            file_success += 1
+    typer.echo(f"Successfully remembered {file_success}/{len(files)} files.")
+    # 2. Extract and Ingest Git History
+    git_logs = get_git_history(directory, max_commits=20)
+    if git_logs:
+        typer.echo(f"Ingesting git history ({len(git_logs)} commits) into Cognee...")
+        git_success = 0
+        for idx, commit_log in enumerate(git_logs, start=1):
+            dataset_name = f"git_commit_{idx}"
+            success = await remember_content(commit_log, dataset_name=dataset_name)
+            if success:
+                git_success += 1
+        typer.echo(f"Successfully remembered {git_success}/{len(git_logs)} commits.")
+    # 3. Extract and Ingest Inline Comments & Docstrings
+    relative_paths = [f["relative_path"] for f in files]
+    comments = get_codebase_comments(directory, relative_paths)
+    if comments:
+        typer.echo(f"Ingesting inline comments ({len(comments)} files containing comments)...")
+        comment_success = 0
+        for idx, comment_block in enumerate(comments, start=1):
+            dataset_name = f"code_comments_{idx}"
+            success = await remember_content(comment_block, dataset_name=dataset_name)
+            if success:
+                comment_success += 1
+        typer.echo(f"Successfully remembered {comment_success}/{len(comments)} comment segments.")
+@app.command()
+def remember(
+    directory: str = typer.Option(
+        ".",
+        "--dir", "-d",
+        help="The directory of the codebase to ingest."
+    )
+):
+    """
+    Ingest the codebase files into persistent Cognee memory.
+    """
+    initialize_cognee()
+    run_async(remember_pipeline(directory))
+    typer.echo("[Success] Codebase memory ingestion completed.")
+@app.command()
+def ask(
+    query: str = typer.Argument(..., help="Your natural language question about the codebase.")
+):
+    """
+    Ask a question about the ingested codebase memory in plain English.
+    """
+    initialize_cognee()
+    typer.echo(f"Querying codebase memory for: '{query}'...")
+    answer = run_async(recall_query(query))
+    typer.echo("\n--- Response ---")
+    typer.echo(answer)
+    typer.echo("----------------")
+@app.command()
+def chat():
+    """
+    Start an interactive DevMind terminal chat session to explore your codebase.
+    """
+    initialize_cognee()
+    from rich.console import Console
+    from rich.markdown import Markdown
+    from rich.panel import Panel
+    from rich.prompt import Prompt
+    console = Console()
+    console.print(Panel.fit("[bold blue]DevMind Codebase Chat[/bold blue]\n[dim]Type your queries below. Type 'exit' or 'quit' to close.[/dim]", border_style="blue"))
+    while True:
+        try:
+            query = Prompt.ask("\n[bold green]You[/bold green]")
+            if not query.strip():
+                continue
+            if query.lower().strip() in ['exit', 'quit', 'clear']:
+                console.print("[dim]Goodbye![/dim]")
+                break
+            with console.status("[bold cyan]DevMind is thinking...[/bold cyan]", spinner="dots"):
+                answer = run_async(recall_query(query))
+            console.print("\n[bold magenta]DevMind:[/bold magenta]")
+            console.print(Markdown(answer))
+        except (KeyboardInterrupt, EOFError):
+            console.print("\n[dim]Goodbye![/dim]")
+            break
+        except Exception as e:
+            console.print(f"[bold red]Error:[/bold red] {str(e)}")
+@app.command()
+def log(
+    decision: str = typer.Argument(..., help="The Architectural Decision Record (ADR) text to log.")
+):
+    """
+    Log an Architectural Decision Record (ADR) into persistent memory.
+    """
+    initialize_cognee()
+    typer.echo(f"Logging decision: '{decision}'...")
+    tagged_decision = f"Architectural Decision Record:\n{decision}"
+    import time
+    dataset_name = f"adr_decision_{int(time.time())}"
+    success = run_async(remember_content(tagged_decision, dataset_name=dataset_name))
+    if success:
+        typer.echo("[Success] Architectural decision successfully logged.")
+    else:
+        typer.echo("[Error] Failed to log architectural decision.")
+@app.command()
+def refresh(
+    directory: str = typer.Option(
+        ".",
+        "--dir", "-d",
+        help="The directory of the codebase to refresh."
+    )
+):
+    """
+    Refresh codebase memory by scanning for changed files and refining relationships.
+    """
+    initialize_cognee()
+    typer.echo("Scanning for codebase changes to refresh memory...")
+    run_async(remember_pipeline(directory))
+    typer.echo("Refining the codebase memory graph structure...")
+    # Improve memory on all dataset partitions
+    success = run_async(improve_memory(dataset_name="codebase_memory"))
+    if success:
+        typer.echo("[Success] Memory refresh and relationship refinement completed.")
+    else:
+        typer.echo("[Warning] File changes re-ingested, but relationship refinement had warnings.")
+@app.command()
+def forget(
+    file_path: str = typer.Option(
+        None,
+        "--file", "-f",
+        help="The relative path of the file memory to forget."
+    ),
+    all_memories: bool = typer.Option(
+        False,
+        "--all", "-a",
+        help="Wipe all local memory databases completely."
+    )
+):
+    """
+    Surgically forget a specific file's memory, or completely wipe the local databases.
+    """
+    initialize_cognee()
+    if all_memories:
+        typer.echo("Wiping all local memory databases...")
+        import shutil
+        from devmind.memory import system_path, data_path
+        try:
+            if os.path.exists(system_path):
+                shutil.rmtree(system_path)
+            if os.path.exists(data_path):
+                shutil.rmtree(data_path)
+            typer.echo("[Success] Local memory databases completely wiped.")
+        except Exception as e:
+            typer.echo(f"[Error] Failed to wipe memory folders: {e}")
+        return
+    if file_path:
+        dataset_name = file_path.replace("/", "_").replace("\\", "_").replace(".", "_").replace(" ", "_")
+        typer.echo(f"Removing memory dataset '{dataset_name}'...")
+        success = run_async(forget_memory(dataset_name))
+        if success:
+            typer.echo(f"[Success] Memory of '{file_path}' successfully forgotten.")
+        else:
+            typer.echo(f"[Error] Failed to forget memory of '{file_path}'.")
+    else:
+        typer.echo("[Warning] Please specify either --file <path> to forget a file, or --all to wipe all databases.")
+@app.command()
+def dashboard(
+    port: int = typer.Option(8000, "--port", "-p", help="Port to run the dashboard server on.")
+):
+    """
+    Launch the DevMind Web UI dashboard.
+    """
+    import uvicorn
+    typer.echo(f"Starting DevMind Web UI Dashboard on http://localhost:{port} ...")
+    uvicorn.run("devmind.web.app:app", host="127.0.0.1", port=port, reload=False)
+@app.command()
+def mcp():
+    """
+    Start the DevMind MCP server for integration with Claude Code.
+    """
+    typer.echo("Starting DevMind MCP Server...")
+    from devmind.integrations.claude_code import mcp as mcp_instance
+    mcp_instance.run()
+if __name__ == "__main__":
+    app()

devmind_cli-0.1.0/devmind/ingestion/comment_extractor.py ADDED Viewed

@@ -0,0 +1,91 @@
+import os
+import re
+import logging
+logger = logging.getLogger("devmind.comment_extractor")
+# Regex to find common developer tags in comments
+TAG_PATTERN = re.compile(r"\b(TODO|FIXME|NOTE|BUG|HACK|WARNING|ADR|DEPRECATED)\b", re.IGNORECASE)
+def extract_comments_from_file(file_path: str) -> list[str]:
+    """
+    Parses a single file, extracting inline comments and docstrings
+    that contain key developer tags (TODO, FIXME, NOTE, HACK, etc.).
+    """
+    extracted = []
+    _, ext = os.path.splitext(file_path.lower())
+    try:
+        with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
+            content = f.read()
+        lines = content.splitlines()
+        # 1. Python parsing (extract hash comments and triple-quoted docstrings)
+        if ext in (".py", ".sh", ".yaml", ".yml", ".ini"):
+            # Inline comment scanner
+            for idx, line in enumerate(lines, start=1):
+                comment_match = re.search(r"#\s*(.*)", line)
+                if comment_match:
+                    comment_text = comment_match.group(1).strip()
+                    if TAG_PATTERN.search(comment_text):
+                        extracted.append(f"Line {idx}: {comment_text}")
+            # Simple regex search for docstrings
+            docstrings = re.findall(r'"""(.*?)"""', content, re.DOTALL)
+            docstrings.extend(re.findall(r"'''(.*?)'''", content, re.DOTALL))
+            for doc in docstrings:
+                doc_clean = doc.strip()
+                if doc_clean:
+                    extracted.append(f"Docstring: {doc_clean}")
+        # 2. C-Style languages parsing (JS, TS, C, C++, Go, Java, Rust)
+        elif ext in (".js", ".ts", ".jsx", ".tsx", ".c", ".cpp", ".h", ".go", ".java", ".rs", ".css"):
+            # Inline comment scanner (// ...)
+            for idx, line in enumerate(lines, start=1):
+                comment_match = re.search(r"//\s*(.*)", line)
+                if comment_match:
+                    comment_text = comment_match.group(1).strip()
+                    if TAG_PATTERN.search(comment_text):
+                        extracted.append(f"Line {idx}: {comment_text}")
+            # Block comment scanner (/* ... */)
+            block_comments = re.findall(r"/\*(.*?)\*/", content, re.DOTALL)
+            for block in block_comments:
+                for idx, block_line in enumerate(block.splitlines(), start=1):
+                    block_line_clean = block_line.strip().lstrip("*").strip()
+                    if TAG_PATTERN.search(block_line_clean):
+                        extracted.append(f"Block Comment: {block_line_clean}")
+        # 3. HTML/XML/Markdown C-style comments (<!-- ... -->)
+        elif ext in (".html", ".htm", ".xml", ".md"):
+            html_comments = re.findall(r"<!--(.*?)-->", content, re.DOTALL)
+            for block in html_comments:
+                for block_line in block.splitlines():
+                    block_line_clean = block_line.strip()
+                    if TAG_PATTERN.search(block_line_clean):
+                        extracted.append(f"HTML Comment: {block_line_clean}")
+    except Exception as e:
+        logger.error(f"Error parsing comments from {file_path}: {e}")
+    return extracted
+def get_codebase_comments(repo_path: str, source_files: list[str]) -> list[str]:
+    """
+    Loops through all project files and extracts formatted comments/docstrings.
+    Returns a list of structured comment records.
+    """
+    all_comments = []
+    logger.info("Scanning codebase for inline comments and docstrings...")
+    for file_path in source_files:
+        abs_path = os.path.join(repo_path, file_path)
+        file_comments = extract_comments_from_file(abs_path)
+        if file_comments:
+            comment_log = [f"File Path: {file_path}"]
+            comment_log.extend(file_comments)
+            all_comments.append("\n".join(comment_log))
+    return all_comments