PyPI - codefox - Versions diffs - 0.3.0__py3-none-any.whl - Mend

codefox 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

codefox/__init__.py +0 -0
codefox/__main__.py +4 -0
codefox/api/__init__.py +0 -0
codefox/api/base_api.py +112 -0
codefox/api/gemini.py +224 -0
codefox/api/model_enum.py +31 -0
codefox/api/ollama.py +138 -0
codefox/api/openrouter.py +175 -0
codefox/base_cli.py +7 -0
codefox/cli_manager.py +59 -0
codefox/init.py +191 -0
codefox/list.py +35 -0
codefox/main.py +36 -0
codefox/prompts/__init__.py +0 -0
codefox/prompts/audit_system.py +383 -0
codefox/prompts/prompt_template.py +102 -0
codefox/prompts/template.py +7 -0
codefox/scan.py +68 -0
codefox/utils/__init__.py +0 -0
codefox/utils/helper.py +79 -0
codefox/utils/local_rag.py +47 -0
codefox-0.3.0.dist-info/METADATA +229 -0
codefox-0.3.0.dist-info/RECORD +26 -0
codefox-0.3.0.dist-info/WHEEL +4 -0
codefox-0.3.0.dist-info/entry_points.txt +2 -0
codefox-0.3.0.dist-info/licenses/LICENSE +21 -0

codefox/__init__.py ADDED Viewed

File without changes

codefox/__main__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from codefox.main import cli
+if __name__ == "__main__":
+    cli()

codefox/api/__init__.py ADDED Viewed

File without changes

codefox/api/base_api.py ADDED Viewed

@@ -0,0 +1,112 @@
+import abc
+import dataclasses
+from typing import Any, Protocol
+from codefox.utils.helper import Helper
+class ExecuteResponse(Protocol):
+    text: str
+@dataclasses.dataclass
+class Response:
+    text: str
+class BaseAPI(abc.ABC):
+    def __init__(self, config: dict[str, Any] | None = None) -> None:
+        super().__init__()
+        try:
+            self.config: dict[str, Any] = config or Helper.read_yml(
+                ".codefox.yml"
+            )
+        except FileNotFoundError:
+            raise RuntimeError(
+                "Configuration file '.codefox.yml' not found. "
+                "Please run 'codefox --command init' first."
+            )
+        if "model" not in self.config or not self.config.get("model"):
+            raise ValueError("Missing required key 'model'")
+        self.model_config = self._processing_model_config(self.config["model"])
+        self.review_config = self._processing_review_config(
+            self.config["review"]
+        )
+    @abc.abstractmethod
+    def check_model(self, name: str) -> bool:
+        pass
+    @abc.abstractmethod
+    def execute(self, diff_text: str) -> ExecuteResponse:
+        pass
+    @abc.abstractmethod
+    def check_connection(self) -> tuple[bool, Any]:
+        pass
+    @abc.abstractmethod
+    def upload_files(self, path_files: str) -> tuple[bool, Any]:
+        pass
+    @abc.abstractmethod
+    def remove_files(self) -> None:
+        pass
+    def get_tag_models(self) -> list[str]:
+        return []
+    def _processing_review_config(
+        self, review_config: dict[str, Any]
+    ) -> dict[str, Any]:
+        if "max_issues" not in review_config:
+            review_config["max_issues"] = None
+        if "suggest_fixes" not in review_config:
+            review_config["suggest_fixes"] = True
+        if "diff_only" not in review_config:
+            review_config["diff_only"] = False
+        return review_config
+    def _processing_model_config(
+        self, model_config: dict[str, Any]
+    ) -> dict[str, Any]:
+        if "name" not in model_config or not model_config.get("name"):
+            raise ValueError("Key 'model' missing required value key 'name'")
+        if not model_config["name"].strip():
+            raise ValueError("Model name cannot be empty")
+        if "max_tokens" not in model_config or not model_config.get(
+            "max_tokens"
+        ):
+            model_config["max_tokens"] = None
+        if "max_completion_tokens" not in model_config or not model_config.get(
+            "max_completion_tokens"
+        ):
+            model_config["max_completion_tokens"] = None
+        if "temperature" not in model_config or not model_config.get(
+            "temperature"
+        ):
+            model_config["temperature"] = 0.2
+        if model_config["temperature"] > 1 or model_config["temperature"] < 0:
+            raise ValueError(
+                "Temperature must be between 0 and 1, "
+                "got {model_config['temperature']}"
+            )
+        timeout = model_config.get("timeout")
+        if timeout is None:
+            model_config["timeout"] = 600
+            timeout = 600
+        if not isinstance(timeout, (int, float)) or timeout <= 0:
+            raise ValueError(f"Timeout must be positive number, got {timeout}")
+        return model_config

codefox/api/gemini.py ADDED Viewed

@@ -0,0 +1,224 @@
+import os
+import time
+from collections.abc import Callable
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from typing import Any
+from google import genai
+from google.genai import types
+from rich import print
+from rich.progress import (
+    BarColumn,
+    Progress,
+    SpinnerColumn,
+    TextColumn,
+    TimeElapsedColumn,
+)
+from codefox.api.base_api import BaseAPI, ExecuteResponse, Response
+from codefox.prompts.prompt_template import PromptTemplate
+from codefox.utils.helper import Helper
+class Gemini(BaseAPI):
+    default_model_name = "gemini-2.0-flash"
+    MAX_WORKERS = 10
+    def __init__(self, config: dict[str, Any] | None = None) -> None:
+        super().__init__(config)
+        self.store: types.FileSearchStore | None = None
+        self.client = genai.Client(api_key=os.getenv("CODEFOX_API_KEY"))
+    def check_model(self, name: str) -> bool:
+        return name in self.get_tag_models()
+    def check_connection(self) -> tuple[bool, Any]:
+        try:
+            self.client.models.list()
+            return True, None
+        except Exception as e:
+            return False, e
+    def get_tag_models(self) -> list[str]:
+        response = self.client.models.list()
+        page = response.page or []
+        return [
+            (model.name or "").replace("models/", "")
+            for model in page
+            if (
+                model.supported_actions
+                and "generateContent" in model.supported_actions
+            )
+        ]
+    def upload_files(
+        self, path_files: str
+    ) -> tuple[bool, str | types.FileSearchStore | None]:
+        if self.review_config["diff_only"]:
+            self.store = None
+            return True, None
+        ignored_paths = Helper.read_codefoxignore()
+        try:
+            store = self.client.file_search_stores.create(
+                config={"display_name": "CodeFox File Store"}
+            )
+        except Exception as e:
+            return False, f"Error creating file search store: {e}"
+        valid_files = [
+            f
+            for f in Helper.get_all_files(path_files)
+            if not any(ignored in f for ignored in ignored_paths)
+        ]
+        operations = self._upload_thread_pool_files(store, valid_files)
+        if not operations:
+            return True, None
+        print(
+            "[yellow]Waiting for Gemini API "
+            "to process uploaded files...[/yellow]"
+        )
+        total = len(operations)
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            BarColumn(),
+            TextColumn("{task.completed}/{task.total}"),
+            TimeElapsedColumn(),
+        ) as progress:
+            task = progress.add_task("Processing files...", total=total)
+            timeout = self.model_config["timeout"]
+            start_time = time.time()
+            pending_ops = {op.name: op for op in operations}
+            while pending_ops:
+                if time.time() - start_time > timeout:
+                    return False, "Gemini file processing timed out."
+                for name in list(pending_ops.keys()):
+                    op = self.client.operations.get(pending_ops[name])
+                    if op.done:
+                        if op.error:
+                            print(
+                                f"File processing failed: {op.error.message}"
+                            )
+                        pending_ops.pop(name)
+                done_count = len(operations) - len(pending_ops)
+                progress.update(task, completed=done_count)
+                if not pending_ops:
+                    break
+                time.sleep(2)
+        self.store = store
+        return True, None
+    def remove_files(self):
+        if self.store is not None:
+            try:
+                self.client.file_search_stores.delete(
+                    name=self.store.name,
+                    config=types.DeleteFileSearchStoreConfig(force=True),
+                )
+                print(
+                    "Successfully removed "
+                    f"file search store: {self.store.name}"
+                )
+            except Exception as e:
+                print(
+                    f"Error removing file search store {self.store.name}: {e}"
+                )
+        else:
+            print("No file search store to remove")
+    def execute(self, diff_text: str) -> ExecuteResponse:
+        system_prompt = PromptTemplate(self.config)
+        content = (
+            "Analyze the following git diff"
+            f"and identify potential risks:\n\n{diff_text}"
+        )
+        tools: list[types.Tool | Callable[..., Any] | Any | Any] = []
+        if self.store is not None and self.store.name is not None:
+            tools.append(
+                types.Tool(
+                    file_search=types.FileSearch(
+                        file_search_store_names=[self.store.name]
+                    )
+                )
+            )
+        response = self.client.models.generate_content(
+            model=self.model_config["name"],
+            contents=content,
+            config=types.GenerateContentConfig(
+                system_instruction=system_prompt.get(),
+                temperature=self.model_config["temperature"],
+                max_output_tokens=self.model_config["max_tokens"],
+                tools=tools,
+            ),
+        )
+        return Response(text=response.text or "")
+    def _upload_thread_pool_files(
+        self, store: types.FileSearchStore, valid_files: list | None = None
+    ) -> list:
+        """
+        Upload many files to Gemini store
+        """
+        valid_files = valid_files or []
+        if not valid_files:
+            return []
+        operations = []
+        with Progress() as progress:
+            task = progress.add_task(
+                "[bold cyan]Uploading codebase...[/]", total=len(valid_files)
+            )
+            with ThreadPoolExecutor(max_workers=self.MAX_WORKERS) as executor:
+                futures = {
+                    executor.submit(
+                        self._upload_single_file, file, store
+                    ): file
+                    for file in valid_files
+                }
+                for future in as_completed(futures):
+                    upload_op, error = future.result()
+                    if error:
+                        failed_file, exc = error
+                        print(
+                            f"[red]Error uploading {failed_file}: {exc}[/red]"
+                        )
+                    else:
+                        operations.append(upload_op)
+                    progress.advance(task)
+        return operations
+    def _upload_single_file(
+        self, file_path: str, store: types.FileSearchStore
+    ) -> tuple:
+        """
+        Upload single file to gemini store
+        """
+        try:
+            file_stores = self.client.file_search_stores
+            upload_op = file_stores.upload_to_file_search_store(
+                file_search_store_name=store.name or "",
+                file=file_path,
+                config={"mime_type": "text/plain"},
+            )
+            return upload_op, None
+        except Exception as e:
+            return None, (file_path, e)

codefox/api/model_enum.py ADDED Viewed

@@ -0,0 +1,31 @@
+import enum
+from typing import cast
+from codefox.api.base_api import BaseAPI
+from codefox.api.gemini import Gemini
+from codefox.api.ollama import Ollama
+from codefox.api.openrouter import OpenRouter
+class ModelEnum(enum.Enum):
+    GEMINI = Gemini
+    OPENROUTER = OpenRouter
+    OLLAMA = Ollama
+    @property
+    def api_class(self) -> type[BaseAPI]:
+        return cast(type[BaseAPI], self.value)
+    @classmethod
+    def by_name(cls, name: str) -> "ModelEnum":
+        try:
+            return cls[name.upper()]
+        except KeyError:
+            available = [e.name.lower() for e in cls]
+            raise ValueError(
+                f"Unknown provider '{name}'. Available: {available}"
+            ) from None
+    @classmethod
+    def names(cls) -> list[str]:
+        return [e.name.lower() for e in cls]

codefox/api/ollama.py ADDED Viewed

@@ -0,0 +1,138 @@
+import os
+from typing import Any
+import requests
+from ollama import ChatResponse, Client
+from codefox.api.base_api import BaseAPI, ExecuteResponse, Response
+from codefox.prompts.prompt_template import PromptTemplate
+from codefox.utils.local_rag import LocalRAG
+class Ollama(BaseAPI):
+    default_model_name = "gemma3:12b"
+    default_embedding = "BAAI/bge-small-en-v1.5"
+    base_url = "https://ollama.com"
+    def __init__(self, config=None):
+        super().__init__(config)
+        if self.model_config.get("base_url"):
+            self.base_url = self.model_config.get("base_url")
+        if "embedding" not in self.model_config or not self.model_config.get(
+            "embedding"
+        ):
+            self.model_config["embedding"] = self.default_embedding
+        api_key = os.getenv("CODEFOX_API_KEY")
+        headers = None
+        if api_key and api_key != "null":
+            headers = {
+                "Authorization": f"Bearer {api_key}",
+            }
+        self.rag = None
+        self.client = Client(
+            host=self.base_url,
+            headers=headers,
+            timeout=self.model_config.get("timeout", 600),
+        )
+    def check_model(self, name: str) -> bool:
+        return name in self.get_tag_models()
+    def check_connection(self) -> tuple[bool, Any]:
+        try:
+            self.client.show(self.default_model_name)
+            return True, None
+        except Exception as e:
+            return False, e
+    def upload_files(self, path_files: str) -> tuple[bool, Any]:
+        if self.review_config["diff_only"]:
+            return True, None
+        self.rag = LocalRAG(self.model_config["embedding"], path_files)
+        self.rag.build()
+        return True, None
+    def remove_files(self):
+        pass
+    def execute(self, diff_text: str) -> ExecuteResponse:
+        system_prompt = PromptTemplate(self.config)
+        rag_context = ""
+        if self.rag:
+            hits = self.rag.search(diff_text, k=5)
+            rag_context = "\n\n".join(hits)
+        content = f"""
+        You are performing a DIFF AUDIT.
+        Your task:
+        Detect BEHAVIOR CHANGE caused by the modified lines.
+        DO NOT:
+        - explain the codebase
+        - describe architecture
+        - summarize classes
+        If you do not compare OLD vs NEW behavior -> the answer is INVALID.
+        ──────── DIFF ────────
+        GIT DIFF WITH +/- MARKERS. ONLY THESE LINES CHANGED.
+        {diff_text}
+        ──────── RELEVANT CONTEXT ────────
+        (USE ONLY IF NEEDED TO TRACE DATA FLOW)
+        Do NOT analyze this section by itself.
+        Use it only to understand symbols referenced in the diff.
+        {rag_context}
+        ──────── REQUIRED REASONING ────────
+        1. List the changed lines
+        2. For each change:
+        OLD behavior ->
+        NEW behavior ->
+        3. What execution path now behaves differently?
+        4. What can break?
+        If there is no behavioral change -> explicitly say:
+        NO BEHAVIORAL CHANGE.
+        """
+        options = {}
+        if self.model_config.get("temperature") is not None:
+            options["temperature"] = self.model_config["temperature"]
+        if self.model_config.get("max_tokens") is not None:
+            options["num_predict"] = self.model_config["max_tokens"]
+        chat_response: ChatResponse = self.client.chat(
+            model=self.model_config["name"],
+            messages=[
+                {"role": "system", "content": system_prompt.get()},
+                {"role": "user", "content": content},
+            ],
+            options=options if options else None,
+        )
+        response = Response(chat_response.message.content or "")
+        return response
+    def get_tag_models(self) -> list[str]:
+        response = requests.get(f"{self.base_url}/api/tags")
+        if response.status_code == 200:
+            data = response.json()
+            return [
+                model["name"] for model in data["models"] if model.get("name")
+            ]
+        else:
+            return []

codefox/api/openrouter.py ADDED Viewed

@@ -0,0 +1,175 @@
+import math
+import os
+from typing import Any
+from openai import OpenAI
+from rich.progress import track
+from codefox.api.base_api import BaseAPI, ExecuteResponse, Response
+from codefox.prompts.prompt_template import PromptTemplate
+from codefox.utils.helper import Helper
+class OpenRouter(BaseAPI):
+    default_model_name = "qwen/qwen3-vl-30b-a3b-thinking"
+    default_embedding = "text-embedding-3-small"
+    base_url = "https://openrouter.ai/api/v1"
+    def __init__(self, config: dict[str, Any] | None = None) -> None:
+        super().__init__(config)
+        if "base_url" in self.model_config or self.model_config.get(
+            "base_url"
+        ):
+            self.base_url = self.model_config["base_url"]
+        if "embedding" not in self.model_config or not self.model_config.get(
+            "embedding"
+        ):
+            self.model_config["embedding"] = self.default_embedding
+        self.files: list[dict[str, Any]] | None = None
+        self.index: list[dict[str, Any]] = []
+        self.client = OpenAI(
+            api_key=os.getenv("CODEFOX_API_KEY"), base_url=self.base_url
+        )
+    def check_connection(self) -> tuple[bool, Any]:
+        try:
+            self.client.models.list()
+            return True, None
+        except Exception as e:
+            return False, e
+    def check_model(self, name: str) -> bool:
+        return name in self.get_tag_models()
+    def execute(self, diff_text: str = "") -> ExecuteResponse:
+        system_prompt = PromptTemplate(self.config)
+        content = (
+            "Analyze the following git diff"
+            f"and identify potential risks:\n\n{diff_text}"
+        )
+        rag_chunks = self._search(diff_text, k=8)
+        files_context = "\n\n".join(
+            f"<file path='{c['path']}'>\n{c['text']}\n</file>"
+            for c in rag_chunks
+        )
+        completion = self.client.chat.completions.create(
+            model=self.model_config["name"],
+            temperature=self.model_config["temperature"],
+            timeout=self.model_config.get("timeout", 600),
+            max_tokens=self.model_config["max_tokens"],
+            max_completion_tokens=self.model_config["max_completion_tokens"],
+            messages=[
+                {"role": "system", "content": system_prompt.get()},
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": content},
+                        {"type": "text", "text": files_context},
+                    ],
+                },
+            ],
+        )
+        raw = completion.choices[0].message.content
+        return Response(text=raw if raw is not None else "")
+    def remove_files(self) -> None:
+        pass
+    def upload_files(self, path_files: str) -> tuple[bool, Any]:
+        if self.review_config["diff_only"]:
+            return True, None
+        ignored_paths = Helper.read_codefoxignore()
+        valid_files = [
+            f
+            for f in Helper.get_all_files(path_files)
+            if not any(ignored in f for ignored in ignored_paths)
+        ]
+        files: list[dict[str, Any]] = []
+        for file in track(valid_files, description="Progress read files..."):
+            try:
+                with open(file, encoding="utf-8", errors="ignore") as f:
+                    content = f.read()
+                files.append({"path": file, "content": content})
+            except Exception:
+                continue
+        try:
+            self.index = []
+            for file_entry in track(
+                files, description="Progress files processing..."
+            ):
+                chunks = self._chunk_text(file_entry["content"])
+                if not chunks:
+                    continue
+                embeddings = self._embed(chunks)
+                for chunk, emb in zip(chunks, embeddings):
+                    self.index.append(
+                        {
+                            "path": file_entry["path"],
+                            "text": chunk,
+                            "embedding": emb,
+                        }
+                    )
+            self.files = files
+            return True, None
+        except Exception as e:
+            return False, e
+    def get_tag_models(self) -> list:
+        models = self.client.models.list()
+        return [model.id for model in models]
+    def _chunk_text(self, text: str, size: int = 800) -> list[str]:
+        raw_chunks = [text[i : i + size] for i in range(0, len(text), size)]
+        return [c for c in raw_chunks if c.strip()]
+    def _embed(self, texts: list[str]) -> list[list[float]]:
+        clean_texts = [t for t in texts if t and t.strip()]
+        if not clean_texts:
+            return []
+        try:
+            resp = self.client.embeddings.create(
+                model=self.model_config["embedding"],
+                input=clean_texts,
+            )
+        except ValueError:
+            return []
+        if not resp.data:
+            return []
+        return [d.embedding for d in resp.data]
+    def _cosine(self, a, b):
+        dot = sum(x * y for x, y in zip(a, b))
+        na = math.sqrt(sum(x * x for x in a))
+        nb = math.sqrt(sum(x * x for x in b))
+        return dot / (na * nb + 1e-8)
+    def _search(self, query: str, k: int = 5) -> list[dict]:
+        query_emb = self._embed([query])[0]
+        scored = [
+            (self._cosine(query_emb, item["embedding"]), item)
+            for item in self.index
+        ]
+        scored.sort(key=lambda x: x[0], reverse=True)
+        return [item for _, item in scored[:k]]

codefox/base_cli.py ADDED Viewed

@@ -0,0 +1,7 @@
+import abc
+class BaseCLI(abc.ABC):
+    @abc.abstractmethod
+    def execute(self) -> None:
+        pass