codefox 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codefox/__init__.py ADDED
File without changes
codefox/__main__.py ADDED
@@ -0,0 +1,4 @@
1
+ from codefox.main import cli
2
+
3
+ if __name__ == "__main__":
4
+ cli()
File without changes
@@ -0,0 +1,112 @@
1
+ import abc
2
+ import dataclasses
3
+ from typing import Any, Protocol
4
+
5
+ from codefox.utils.helper import Helper
6
+
7
+
8
+ class ExecuteResponse(Protocol):
9
+ text: str
10
+
11
+
12
+ @dataclasses.dataclass
13
+ class Response:
14
+ text: str
15
+
16
+
17
+ class BaseAPI(abc.ABC):
18
+ def __init__(self, config: dict[str, Any] | None = None) -> None:
19
+ super().__init__()
20
+ try:
21
+ self.config: dict[str, Any] = config or Helper.read_yml(
22
+ ".codefox.yml"
23
+ )
24
+ except FileNotFoundError:
25
+ raise RuntimeError(
26
+ "Configuration file '.codefox.yml' not found. "
27
+ "Please run 'codefox --command init' first."
28
+ )
29
+
30
+ if "model" not in self.config or not self.config.get("model"):
31
+ raise ValueError("Missing required key 'model'")
32
+
33
+ self.model_config = self._processing_model_config(self.config["model"])
34
+ self.review_config = self._processing_review_config(
35
+ self.config["review"]
36
+ )
37
+
38
+ @abc.abstractmethod
39
+ def check_model(self, name: str) -> bool:
40
+ pass
41
+
42
+ @abc.abstractmethod
43
+ def execute(self, diff_text: str) -> ExecuteResponse:
44
+ pass
45
+
46
+ @abc.abstractmethod
47
+ def check_connection(self) -> tuple[bool, Any]:
48
+ pass
49
+
50
+ @abc.abstractmethod
51
+ def upload_files(self, path_files: str) -> tuple[bool, Any]:
52
+ pass
53
+
54
+ @abc.abstractmethod
55
+ def remove_files(self) -> None:
56
+ pass
57
+
58
+ def get_tag_models(self) -> list[str]:
59
+ return []
60
+
61
+ def _processing_review_config(
62
+ self, review_config: dict[str, Any]
63
+ ) -> dict[str, Any]:
64
+ if "max_issues" not in review_config:
65
+ review_config["max_issues"] = None
66
+
67
+ if "suggest_fixes" not in review_config:
68
+ review_config["suggest_fixes"] = True
69
+
70
+ if "diff_only" not in review_config:
71
+ review_config["diff_only"] = False
72
+
73
+ return review_config
74
+
75
+ def _processing_model_config(
76
+ self, model_config: dict[str, Any]
77
+ ) -> dict[str, Any]:
78
+ if "name" not in model_config or not model_config.get("name"):
79
+ raise ValueError("Key 'model' missing required value key 'name'")
80
+
81
+ if not model_config["name"].strip():
82
+ raise ValueError("Model name cannot be empty")
83
+
84
+ if "max_tokens" not in model_config or not model_config.get(
85
+ "max_tokens"
86
+ ):
87
+ model_config["max_tokens"] = None
88
+
89
+ if "max_completion_tokens" not in model_config or not model_config.get(
90
+ "max_completion_tokens"
91
+ ):
92
+ model_config["max_completion_tokens"] = None
93
+
94
+ if "temperature" not in model_config or not model_config.get(
95
+ "temperature"
96
+ ):
97
+ model_config["temperature"] = 0.2
98
+
99
+ if model_config["temperature"] > 1 or model_config["temperature"] < 0:
100
+ raise ValueError(
101
+ "Temperature must be between 0 and 1, "
102
+ "got {model_config['temperature']}"
103
+ )
104
+
105
+ timeout = model_config.get("timeout")
106
+ if timeout is None:
107
+ model_config["timeout"] = 600
108
+ timeout = 600
109
+ if not isinstance(timeout, (int, float)) or timeout <= 0:
110
+ raise ValueError(f"Timeout must be positive number, got {timeout}")
111
+
112
+ return model_config
codefox/api/gemini.py ADDED
@@ -0,0 +1,224 @@
1
+ import os
2
+ import time
3
+ from collections.abc import Callable
4
+ from concurrent.futures import ThreadPoolExecutor, as_completed
5
+ from typing import Any
6
+
7
+ from google import genai
8
+ from google.genai import types
9
+ from rich import print
10
+ from rich.progress import (
11
+ BarColumn,
12
+ Progress,
13
+ SpinnerColumn,
14
+ TextColumn,
15
+ TimeElapsedColumn,
16
+ )
17
+
18
+ from codefox.api.base_api import BaseAPI, ExecuteResponse, Response
19
+ from codefox.prompts.prompt_template import PromptTemplate
20
+ from codefox.utils.helper import Helper
21
+
22
+
23
+ class Gemini(BaseAPI):
24
+ default_model_name = "gemini-2.0-flash"
25
+ MAX_WORKERS = 10
26
+
27
+ def __init__(self, config: dict[str, Any] | None = None) -> None:
28
+ super().__init__(config)
29
+ self.store: types.FileSearchStore | None = None
30
+ self.client = genai.Client(api_key=os.getenv("CODEFOX_API_KEY"))
31
+
32
+ def check_model(self, name: str) -> bool:
33
+ return name in self.get_tag_models()
34
+
35
+ def check_connection(self) -> tuple[bool, Any]:
36
+ try:
37
+ self.client.models.list()
38
+ return True, None
39
+ except Exception as e:
40
+ return False, e
41
+
42
+ def get_tag_models(self) -> list[str]:
43
+ response = self.client.models.list()
44
+ page = response.page or []
45
+ return [
46
+ (model.name or "").replace("models/", "")
47
+ for model in page
48
+ if (
49
+ model.supported_actions
50
+ and "generateContent" in model.supported_actions
51
+ )
52
+ ]
53
+
54
+ def upload_files(
55
+ self, path_files: str
56
+ ) -> tuple[bool, str | types.FileSearchStore | None]:
57
+ if self.review_config["diff_only"]:
58
+ self.store = None
59
+ return True, None
60
+
61
+ ignored_paths = Helper.read_codefoxignore()
62
+
63
+ try:
64
+ store = self.client.file_search_stores.create(
65
+ config={"display_name": "CodeFox File Store"}
66
+ )
67
+ except Exception as e:
68
+ return False, f"Error creating file search store: {e}"
69
+
70
+ valid_files = [
71
+ f
72
+ for f in Helper.get_all_files(path_files)
73
+ if not any(ignored in f for ignored in ignored_paths)
74
+ ]
75
+
76
+ operations = self._upload_thread_pool_files(store, valid_files)
77
+ if not operations:
78
+ return True, None
79
+
80
+ print(
81
+ "[yellow]Waiting for Gemini API "
82
+ "to process uploaded files...[/yellow]"
83
+ )
84
+ total = len(operations)
85
+
86
+ with Progress(
87
+ SpinnerColumn(),
88
+ TextColumn("[progress.description]{task.description}"),
89
+ BarColumn(),
90
+ TextColumn("{task.completed}/{task.total}"),
91
+ TimeElapsedColumn(),
92
+ ) as progress:
93
+ task = progress.add_task("Processing files...", total=total)
94
+
95
+ timeout = self.model_config["timeout"]
96
+ start_time = time.time()
97
+ pending_ops = {op.name: op for op in operations}
98
+ while pending_ops:
99
+ if time.time() - start_time > timeout:
100
+ return False, "Gemini file processing timed out."
101
+
102
+ for name in list(pending_ops.keys()):
103
+ op = self.client.operations.get(pending_ops[name])
104
+ if op.done:
105
+ if op.error:
106
+ print(
107
+ f"File processing failed: {op.error.message}"
108
+ )
109
+ pending_ops.pop(name)
110
+
111
+ done_count = len(operations) - len(pending_ops)
112
+ progress.update(task, completed=done_count)
113
+
114
+ if not pending_ops:
115
+ break
116
+ time.sleep(2)
117
+
118
+ self.store = store
119
+ return True, None
120
+
121
+ def remove_files(self):
122
+ if self.store is not None:
123
+ try:
124
+ self.client.file_search_stores.delete(
125
+ name=self.store.name,
126
+ config=types.DeleteFileSearchStoreConfig(force=True),
127
+ )
128
+ print(
129
+ "Successfully removed "
130
+ f"file search store: {self.store.name}"
131
+ )
132
+ except Exception as e:
133
+ print(
134
+ f"Error removing file search store {self.store.name}: {e}"
135
+ )
136
+ else:
137
+ print("No file search store to remove")
138
+
139
+ def execute(self, diff_text: str) -> ExecuteResponse:
140
+ system_prompt = PromptTemplate(self.config)
141
+ content = (
142
+ "Analyze the following git diff"
143
+ f"and identify potential risks:\n\n{diff_text}"
144
+ )
145
+
146
+ tools: list[types.Tool | Callable[..., Any] | Any | Any] = []
147
+ if self.store is not None and self.store.name is not None:
148
+ tools.append(
149
+ types.Tool(
150
+ file_search=types.FileSearch(
151
+ file_search_store_names=[self.store.name]
152
+ )
153
+ )
154
+ )
155
+
156
+ response = self.client.models.generate_content(
157
+ model=self.model_config["name"],
158
+ contents=content,
159
+ config=types.GenerateContentConfig(
160
+ system_instruction=system_prompt.get(),
161
+ temperature=self.model_config["temperature"],
162
+ max_output_tokens=self.model_config["max_tokens"],
163
+ tools=tools,
164
+ ),
165
+ )
166
+ return Response(text=response.text or "")
167
+
168
+ def _upload_thread_pool_files(
169
+ self, store: types.FileSearchStore, valid_files: list | None = None
170
+ ) -> list:
171
+ """
172
+ Upload many files to Gemini store
173
+ """
174
+
175
+ valid_files = valid_files or []
176
+ if not valid_files:
177
+ return []
178
+
179
+ operations = []
180
+ with Progress() as progress:
181
+ task = progress.add_task(
182
+ "[bold cyan]Uploading codebase...[/]", total=len(valid_files)
183
+ )
184
+
185
+ with ThreadPoolExecutor(max_workers=self.MAX_WORKERS) as executor:
186
+ futures = {
187
+ executor.submit(
188
+ self._upload_single_file, file, store
189
+ ): file
190
+ for file in valid_files
191
+ }
192
+
193
+ for future in as_completed(futures):
194
+ upload_op, error = future.result()
195
+
196
+ if error:
197
+ failed_file, exc = error
198
+ print(
199
+ f"[red]Error uploading {failed_file}: {exc}[/red]"
200
+ )
201
+ else:
202
+ operations.append(upload_op)
203
+
204
+ progress.advance(task)
205
+
206
+ return operations
207
+
208
+ def _upload_single_file(
209
+ self, file_path: str, store: types.FileSearchStore
210
+ ) -> tuple:
211
+ """
212
+ Upload single file to gemini store
213
+ """
214
+ try:
215
+ file_stores = self.client.file_search_stores
216
+
217
+ upload_op = file_stores.upload_to_file_search_store(
218
+ file_search_store_name=store.name or "",
219
+ file=file_path,
220
+ config={"mime_type": "text/plain"},
221
+ )
222
+ return upload_op, None
223
+ except Exception as e:
224
+ return None, (file_path, e)
@@ -0,0 +1,31 @@
1
+ import enum
2
+ from typing import cast
3
+
4
+ from codefox.api.base_api import BaseAPI
5
+ from codefox.api.gemini import Gemini
6
+ from codefox.api.ollama import Ollama
7
+ from codefox.api.openrouter import OpenRouter
8
+
9
+
10
+ class ModelEnum(enum.Enum):
11
+ GEMINI = Gemini
12
+ OPENROUTER = OpenRouter
13
+ OLLAMA = Ollama
14
+
15
+ @property
16
+ def api_class(self) -> type[BaseAPI]:
17
+ return cast(type[BaseAPI], self.value)
18
+
19
+ @classmethod
20
+ def by_name(cls, name: str) -> "ModelEnum":
21
+ try:
22
+ return cls[name.upper()]
23
+ except KeyError:
24
+ available = [e.name.lower() for e in cls]
25
+ raise ValueError(
26
+ f"Unknown provider '{name}'. Available: {available}"
27
+ ) from None
28
+
29
+ @classmethod
30
+ def names(cls) -> list[str]:
31
+ return [e.name.lower() for e in cls]
codefox/api/ollama.py ADDED
@@ -0,0 +1,138 @@
1
+ import os
2
+ from typing import Any
3
+
4
+ import requests
5
+ from ollama import ChatResponse, Client
6
+
7
+ from codefox.api.base_api import BaseAPI, ExecuteResponse, Response
8
+ from codefox.prompts.prompt_template import PromptTemplate
9
+ from codefox.utils.local_rag import LocalRAG
10
+
11
+
12
+ class Ollama(BaseAPI):
13
+ default_model_name = "gemma3:12b"
14
+ default_embedding = "BAAI/bge-small-en-v1.5"
15
+ base_url = "https://ollama.com"
16
+
17
+ def __init__(self, config=None):
18
+ super().__init__(config)
19
+
20
+ if self.model_config.get("base_url"):
21
+ self.base_url = self.model_config.get("base_url")
22
+
23
+ if "embedding" not in self.model_config or not self.model_config.get(
24
+ "embedding"
25
+ ):
26
+ self.model_config["embedding"] = self.default_embedding
27
+
28
+ api_key = os.getenv("CODEFOX_API_KEY")
29
+
30
+ headers = None
31
+ if api_key and api_key != "null":
32
+ headers = {
33
+ "Authorization": f"Bearer {api_key}",
34
+ }
35
+
36
+ self.rag = None
37
+
38
+ self.client = Client(
39
+ host=self.base_url,
40
+ headers=headers,
41
+ timeout=self.model_config.get("timeout", 600),
42
+ )
43
+
44
+ def check_model(self, name: str) -> bool:
45
+ return name in self.get_tag_models()
46
+
47
+ def check_connection(self) -> tuple[bool, Any]:
48
+ try:
49
+ self.client.show(self.default_model_name)
50
+ return True, None
51
+ except Exception as e:
52
+ return False, e
53
+
54
+ def upload_files(self, path_files: str) -> tuple[bool, Any]:
55
+ if self.review_config["diff_only"]:
56
+ return True, None
57
+
58
+ self.rag = LocalRAG(self.model_config["embedding"], path_files)
59
+ self.rag.build()
60
+
61
+ return True, None
62
+
63
+ def remove_files(self):
64
+ pass
65
+
66
+ def execute(self, diff_text: str) -> ExecuteResponse:
67
+ system_prompt = PromptTemplate(self.config)
68
+
69
+ rag_context = ""
70
+ if self.rag:
71
+ hits = self.rag.search(diff_text, k=5)
72
+ rag_context = "\n\n".join(hits)
73
+
74
+ content = f"""
75
+ You are performing a DIFF AUDIT.
76
+
77
+ Your task:
78
+ Detect BEHAVIOR CHANGE caused by the modified lines.
79
+
80
+ DO NOT:
81
+ - explain the codebase
82
+ - describe architecture
83
+ - summarize classes
84
+
85
+ If you do not compare OLD vs NEW behavior -> the answer is INVALID.
86
+
87
+ ──────── DIFF ────────
88
+ GIT DIFF WITH +/- MARKERS. ONLY THESE LINES CHANGED.
89
+ {diff_text}
90
+
91
+ ──────── RELEVANT CONTEXT ────────
92
+ (USE ONLY IF NEEDED TO TRACE DATA FLOW)
93
+ Do NOT analyze this section by itself.
94
+ Use it only to understand symbols referenced in the diff.
95
+
96
+ {rag_context}
97
+
98
+ ──────── REQUIRED REASONING ────────
99
+
100
+ 1. List the changed lines
101
+ 2. For each change:
102
+ OLD behavior ->
103
+ NEW behavior ->
104
+ 3. What execution path now behaves differently?
105
+ 4. What can break?
106
+
107
+ If there is no behavioral change -> explicitly say:
108
+ NO BEHAVIORAL CHANGE.
109
+ """
110
+
111
+ options = {}
112
+ if self.model_config.get("temperature") is not None:
113
+ options["temperature"] = self.model_config["temperature"]
114
+ if self.model_config.get("max_tokens") is not None:
115
+ options["num_predict"] = self.model_config["max_tokens"]
116
+
117
+ chat_response: ChatResponse = self.client.chat(
118
+ model=self.model_config["name"],
119
+ messages=[
120
+ {"role": "system", "content": system_prompt.get()},
121
+ {"role": "user", "content": content},
122
+ ],
123
+ options=options if options else None,
124
+ )
125
+
126
+ response = Response(chat_response.message.content or "")
127
+ return response
128
+
129
+ def get_tag_models(self) -> list[str]:
130
+ response = requests.get(f"{self.base_url}/api/tags")
131
+
132
+ if response.status_code == 200:
133
+ data = response.json()
134
+ return [
135
+ model["name"] for model in data["models"] if model.get("name")
136
+ ]
137
+ else:
138
+ return []
@@ -0,0 +1,175 @@
1
+ import math
2
+ import os
3
+ from typing import Any
4
+
5
+ from openai import OpenAI
6
+ from rich.progress import track
7
+
8
+ from codefox.api.base_api import BaseAPI, ExecuteResponse, Response
9
+ from codefox.prompts.prompt_template import PromptTemplate
10
+ from codefox.utils.helper import Helper
11
+
12
+
13
+ class OpenRouter(BaseAPI):
14
+ default_model_name = "qwen/qwen3-vl-30b-a3b-thinking"
15
+ default_embedding = "text-embedding-3-small"
16
+ base_url = "https://openrouter.ai/api/v1"
17
+
18
+ def __init__(self, config: dict[str, Any] | None = None) -> None:
19
+ super().__init__(config)
20
+
21
+ if "base_url" in self.model_config or self.model_config.get(
22
+ "base_url"
23
+ ):
24
+ self.base_url = self.model_config["base_url"]
25
+
26
+ if "embedding" not in self.model_config or not self.model_config.get(
27
+ "embedding"
28
+ ):
29
+ self.model_config["embedding"] = self.default_embedding
30
+
31
+ self.files: list[dict[str, Any]] | None = None
32
+ self.index: list[dict[str, Any]] = []
33
+ self.client = OpenAI(
34
+ api_key=os.getenv("CODEFOX_API_KEY"), base_url=self.base_url
35
+ )
36
+
37
+ def check_connection(self) -> tuple[bool, Any]:
38
+ try:
39
+ self.client.models.list()
40
+ return True, None
41
+ except Exception as e:
42
+ return False, e
43
+
44
+ def check_model(self, name: str) -> bool:
45
+ return name in self.get_tag_models()
46
+
47
+ def execute(self, diff_text: str = "") -> ExecuteResponse:
48
+ system_prompt = PromptTemplate(self.config)
49
+ content = (
50
+ "Analyze the following git diff"
51
+ f"and identify potential risks:\n\n{diff_text}"
52
+ )
53
+
54
+ rag_chunks = self._search(diff_text, k=8)
55
+
56
+ files_context = "\n\n".join(
57
+ f"<file path='{c['path']}'>\n{c['text']}\n</file>"
58
+ for c in rag_chunks
59
+ )
60
+
61
+ completion = self.client.chat.completions.create(
62
+ model=self.model_config["name"],
63
+ temperature=self.model_config["temperature"],
64
+ timeout=self.model_config.get("timeout", 600),
65
+ max_tokens=self.model_config["max_tokens"],
66
+ max_completion_tokens=self.model_config["max_completion_tokens"],
67
+ messages=[
68
+ {"role": "system", "content": system_prompt.get()},
69
+ {
70
+ "role": "user",
71
+ "content": [
72
+ {"type": "text", "text": content},
73
+ {"type": "text", "text": files_context},
74
+ ],
75
+ },
76
+ ],
77
+ )
78
+
79
+ raw = completion.choices[0].message.content
80
+ return Response(text=raw if raw is not None else "")
81
+
82
+ def remove_files(self) -> None:
83
+ pass
84
+
85
+ def upload_files(self, path_files: str) -> tuple[bool, Any]:
86
+ if self.review_config["diff_only"]:
87
+ return True, None
88
+
89
+ ignored_paths = Helper.read_codefoxignore()
90
+
91
+ valid_files = [
92
+ f
93
+ for f in Helper.get_all_files(path_files)
94
+ if not any(ignored in f for ignored in ignored_paths)
95
+ ]
96
+
97
+ files: list[dict[str, Any]] = []
98
+ for file in track(valid_files, description="Progress read files..."):
99
+ try:
100
+ with open(file, encoding="utf-8", errors="ignore") as f:
101
+ content = f.read()
102
+
103
+ files.append({"path": file, "content": content})
104
+ except Exception:
105
+ continue
106
+
107
+ try:
108
+ self.index = []
109
+ for file_entry in track(
110
+ files, description="Progress files processing..."
111
+ ):
112
+ chunks = self._chunk_text(file_entry["content"])
113
+
114
+ if not chunks:
115
+ continue
116
+
117
+ embeddings = self._embed(chunks)
118
+
119
+ for chunk, emb in zip(chunks, embeddings):
120
+ self.index.append(
121
+ {
122
+ "path": file_entry["path"],
123
+ "text": chunk,
124
+ "embedding": emb,
125
+ }
126
+ )
127
+
128
+ self.files = files
129
+ return True, None
130
+ except Exception as e:
131
+ return False, e
132
+
133
+ def get_tag_models(self) -> list:
134
+ models = self.client.models.list()
135
+ return [model.id for model in models]
136
+
137
+ def _chunk_text(self, text: str, size: int = 800) -> list[str]:
138
+ raw_chunks = [text[i : i + size] for i in range(0, len(text), size)]
139
+ return [c for c in raw_chunks if c.strip()]
140
+
141
+ def _embed(self, texts: list[str]) -> list[list[float]]:
142
+ clean_texts = [t for t in texts if t and t.strip()]
143
+
144
+ if not clean_texts:
145
+ return []
146
+
147
+ try:
148
+ resp = self.client.embeddings.create(
149
+ model=self.model_config["embedding"],
150
+ input=clean_texts,
151
+ )
152
+ except ValueError:
153
+ return []
154
+
155
+ if not resp.data:
156
+ return []
157
+
158
+ return [d.embedding for d in resp.data]
159
+
160
+ def _cosine(self, a, b):
161
+ dot = sum(x * y for x, y in zip(a, b))
162
+ na = math.sqrt(sum(x * x for x in a))
163
+ nb = math.sqrt(sum(x * x for x in b))
164
+ return dot / (na * nb + 1e-8)
165
+
166
+ def _search(self, query: str, k: int = 5) -> list[dict]:
167
+ query_emb = self._embed([query])[0]
168
+
169
+ scored = [
170
+ (self._cosine(query_emb, item["embedding"]), item)
171
+ for item in self.index
172
+ ]
173
+
174
+ scored.sort(key=lambda x: x[0], reverse=True)
175
+ return [item for _, item in scored[:k]]
codefox/base_cli.py ADDED
@@ -0,0 +1,7 @@
1
+ import abc
2
+
3
+
4
+ class BaseCLI(abc.ABC):
5
+ @abc.abstractmethod
6
+ def execute(self) -> None:
7
+ pass