git-llm-tool 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- git_llm_tool/__init__.py +5 -0
- git_llm_tool/__main__.py +6 -0
- git_llm_tool/cli.py +167 -0
- git_llm_tool/commands/__init__.py +1 -0
- git_llm_tool/commands/changelog_cmd.py +189 -0
- git_llm_tool/commands/commit_cmd.py +134 -0
- git_llm_tool/core/__init__.py +1 -0
- git_llm_tool/core/config.py +352 -0
- git_llm_tool/core/diff_optimizer.py +206 -0
- git_llm_tool/core/exceptions.py +26 -0
- git_llm_tool/core/git_helper.py +250 -0
- git_llm_tool/core/jira_helper.py +238 -0
- git_llm_tool/core/rate_limiter.py +136 -0
- git_llm_tool/core/smart_chunker.py +262 -0
- git_llm_tool/core/token_counter.py +169 -0
- git_llm_tool/providers/__init__.py +21 -0
- git_llm_tool/providers/anthropic_langchain.py +42 -0
- git_llm_tool/providers/azure_openai_langchain.py +59 -0
- git_llm_tool/providers/base.py +203 -0
- git_llm_tool/providers/factory.py +85 -0
- git_llm_tool/providers/gemini_langchain.py +57 -0
- git_llm_tool/providers/langchain_base.py +608 -0
- git_llm_tool/providers/ollama_langchain.py +45 -0
- git_llm_tool/providers/openai_langchain.py +42 -0
- git_llm_tool-0.1.12.dist-info/LICENSE +21 -0
- git_llm_tool-0.1.12.dist-info/METADATA +645 -0
- git_llm_tool-0.1.12.dist-info/RECORD +29 -0
- git_llm_tool-0.1.12.dist-info/WHEEL +4 -0
- git_llm_tool-0.1.12.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
"""Configuration management for git-llm-tool."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import yaml
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict, Any, Optional
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
|
|
9
|
+
from git_llm_tool.core.exceptions import ConfigError
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class LlmConfig:
|
|
14
|
+
"""LLM configuration settings."""
|
|
15
|
+
default_model: str = "gpt-4o"
|
|
16
|
+
language: str = "en"
|
|
17
|
+
api_keys: Dict[str, str] = field(default_factory=dict)
|
|
18
|
+
azure_openai: Dict[str, str] = field(default_factory=dict) # endpoint, api_version, deployment_name
|
|
19
|
+
|
|
20
|
+
# Processing configuration
|
|
21
|
+
chunking_threshold: int = 12000 # Token threshold to trigger chunking + parallel processing
|
|
22
|
+
|
|
23
|
+
# Ollama configuration for hybrid processing
|
|
24
|
+
use_ollama_for_chunks: bool = False # Use Ollama for chunk processing (map phase)
|
|
25
|
+
ollama_model: str = "llama3:8b" # Ollama model for chunk processing
|
|
26
|
+
ollama_base_url: str = "http://localhost:11434" # Ollama API base URL
|
|
27
|
+
|
|
28
|
+
# Parallel processing configuration
|
|
29
|
+
max_parallel_chunks: int = 4 # Maximum concurrent chunks for remote APIs
|
|
30
|
+
ollama_max_parallel_chunks: int = 16 # Maximum concurrent chunks for Ollama (local)
|
|
31
|
+
|
|
32
|
+
# Internal constants (not user-configurable)
|
|
33
|
+
_chunk_size: int = 6000 # Maximum chunk size in characters
|
|
34
|
+
_chunk_overlap: int = 300 # Overlap between chunks to maintain context
|
|
35
|
+
_max_parallel_chunks: int = 4 # Maximum number of chunks to process in parallel (remote APIs)
|
|
36
|
+
_ollama_max_parallel_chunks: int = 16 # Maximum number of chunks to process in parallel (Ollama local)
|
|
37
|
+
_chunk_processing_timeout: float = 120.0 # Timeout for each chunk processing (seconds)
|
|
38
|
+
_max_retries: int = 5 # Maximum number of retries
|
|
39
|
+
_initial_delay: float = 1.0 # Initial retry delay in seconds
|
|
40
|
+
_max_delay: float = 60.0 # Maximum retry delay in seconds
|
|
41
|
+
_backoff_multiplier: float = 2.0 # Exponential backoff multiplier
|
|
42
|
+
_rate_limit_delay: float = 0.5 # Minimum delay between requests
|
|
43
|
+
_max_context_lines: int = 3 # Maximum context lines to keep
|
|
44
|
+
_max_tokens: int = 8000 # Maximum tokens before truncation
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class JiraConfig:
|
|
49
|
+
"""Jira integration configuration."""
|
|
50
|
+
enabled: bool = False
|
|
51
|
+
ticket_pattern: Optional[str] = None # Jira ticket regex pattern
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass
|
|
55
|
+
class EditorConfig:
|
|
56
|
+
"""Editor configuration settings."""
|
|
57
|
+
preferred_editor: Optional[str] = None # e.g., "vi", "nano", "code", etc.
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass
|
|
61
|
+
class AppConfig:
|
|
62
|
+
"""Main application configuration."""
|
|
63
|
+
llm: LlmConfig = field(default_factory=LlmConfig)
|
|
64
|
+
jira: JiraConfig = field(default_factory=JiraConfig)
|
|
65
|
+
editor: EditorConfig = field(default_factory=EditorConfig)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class ConfigLoader:
|
|
69
|
+
"""Singleton configuration loader with hierarchical configuration support."""
|
|
70
|
+
|
|
71
|
+
_instance = None
|
|
72
|
+
_config = None
|
|
73
|
+
|
|
74
|
+
def __new__(cls):
|
|
75
|
+
if cls._instance is None:
|
|
76
|
+
cls._instance = super().__new__(cls)
|
|
77
|
+
cls._instance._initialized = False
|
|
78
|
+
return cls._instance
|
|
79
|
+
|
|
80
|
+
def __init__(self):
|
|
81
|
+
if not getattr(self, '_initialized', False):
|
|
82
|
+
self._config = self._load_config()
|
|
83
|
+
self._initialized = True
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
def config(self) -> AppConfig:
|
|
87
|
+
"""Get the loaded configuration."""
|
|
88
|
+
return self._config
|
|
89
|
+
|
|
90
|
+
def _load_config(self) -> AppConfig:
|
|
91
|
+
"""Load configuration from multiple sources in hierarchical order."""
|
|
92
|
+
config_data = {}
|
|
93
|
+
|
|
94
|
+
# 1. Load global config
|
|
95
|
+
global_config_path = Path.home() / ".git-llm-tool" / "config.yaml"
|
|
96
|
+
if global_config_path.exists():
|
|
97
|
+
config_data.update(self._load_yaml_file(global_config_path))
|
|
98
|
+
|
|
99
|
+
# 2. Load project config (override global)
|
|
100
|
+
project_config_path = Path(".git-llm-tool.yaml")
|
|
101
|
+
if project_config_path.exists():
|
|
102
|
+
project_config = self._load_yaml_file(project_config_path)
|
|
103
|
+
config_data = self._merge_configs(config_data, project_config)
|
|
104
|
+
|
|
105
|
+
# 3. Load environment variables (override file configs)
|
|
106
|
+
env_config = self._load_env_config()
|
|
107
|
+
config_data = self._merge_configs(config_data, env_config)
|
|
108
|
+
|
|
109
|
+
return self._create_app_config(config_data)
|
|
110
|
+
|
|
111
|
+
def _load_yaml_file(self, file_path: Path) -> Dict[str, Any]:
|
|
112
|
+
"""Load YAML configuration file."""
|
|
113
|
+
try:
|
|
114
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
115
|
+
data = yaml.safe_load(f)
|
|
116
|
+
return data if data is not None else {}
|
|
117
|
+
except yaml.YAMLError as e:
|
|
118
|
+
raise ConfigError(f"Invalid YAML in {file_path}: {e}")
|
|
119
|
+
except Exception as e:
|
|
120
|
+
raise ConfigError(f"Failed to read config file {file_path}: {e}")
|
|
121
|
+
|
|
122
|
+
def _load_env_config(self) -> Dict[str, Any]:
|
|
123
|
+
"""Load configuration from environment variables."""
|
|
124
|
+
config = {}
|
|
125
|
+
|
|
126
|
+
# API keys from environment
|
|
127
|
+
api_keys = {}
|
|
128
|
+
if openai_key := os.getenv("OPENAI_API_KEY"):
|
|
129
|
+
api_keys["openai"] = openai_key
|
|
130
|
+
if anthropic_key := os.getenv("ANTHROPIC_API_KEY"):
|
|
131
|
+
api_keys["anthropic"] = anthropic_key
|
|
132
|
+
if google_key := os.getenv("GOOGLE_API_KEY"):
|
|
133
|
+
api_keys["google"] = google_key
|
|
134
|
+
|
|
135
|
+
# Azure OpenAI configuration from environment
|
|
136
|
+
azure_openai = {}
|
|
137
|
+
if azure_endpoint := os.getenv("AZURE_OPENAI_ENDPOINT"):
|
|
138
|
+
azure_openai["endpoint"] = azure_endpoint
|
|
139
|
+
if azure_key := os.getenv("AZURE_OPENAI_API_KEY"):
|
|
140
|
+
api_keys["azure_openai"] = azure_key
|
|
141
|
+
if azure_version := os.getenv("AZURE_OPENAI_API_VERSION"):
|
|
142
|
+
azure_openai["api_version"] = azure_version
|
|
143
|
+
if azure_deployment := os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"):
|
|
144
|
+
azure_openai["deployment_name"] = azure_deployment
|
|
145
|
+
|
|
146
|
+
# Set up LLM config
|
|
147
|
+
if api_keys or azure_openai:
|
|
148
|
+
config["llm"] = {}
|
|
149
|
+
if api_keys:
|
|
150
|
+
config["llm"]["api_keys"] = api_keys
|
|
151
|
+
if azure_openai:
|
|
152
|
+
config["llm"]["azure_openai"] = azure_openai
|
|
153
|
+
|
|
154
|
+
# Other environment variables
|
|
155
|
+
if model := os.getenv("GIT_LLM_MODEL"):
|
|
156
|
+
config.setdefault("llm", {})["default_model"] = model
|
|
157
|
+
|
|
158
|
+
if language := os.getenv("GIT_LLM_LANGUAGE"):
|
|
159
|
+
config.setdefault("llm", {})["language"] = language
|
|
160
|
+
|
|
161
|
+
return config
|
|
162
|
+
|
|
163
|
+
def _merge_configs(self, base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
|
|
164
|
+
"""Merge two configuration dictionaries recursively."""
|
|
165
|
+
result = base.copy()
|
|
166
|
+
|
|
167
|
+
for key, value in override.items():
|
|
168
|
+
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
|
|
169
|
+
result[key] = self._merge_configs(result[key], value)
|
|
170
|
+
else:
|
|
171
|
+
result[key] = value
|
|
172
|
+
|
|
173
|
+
return result
|
|
174
|
+
|
|
175
|
+
def _create_app_config(self, config_data: Dict[str, Any]) -> AppConfig:
|
|
176
|
+
"""Create AppConfig instance from configuration data."""
|
|
177
|
+
# Create LLM config
|
|
178
|
+
llm_data = config_data.get("llm", {})
|
|
179
|
+
llm_config = LlmConfig(
|
|
180
|
+
default_model=llm_data.get("default_model", "gpt-4o"),
|
|
181
|
+
language=llm_data.get("language", "en"),
|
|
182
|
+
api_keys=llm_data.get("api_keys", {}),
|
|
183
|
+
azure_openai=llm_data.get("azure_openai", {}),
|
|
184
|
+
# Processing settings
|
|
185
|
+
chunking_threshold=llm_data.get("chunking_threshold", 12000),
|
|
186
|
+
# Ollama settings
|
|
187
|
+
use_ollama_for_chunks=llm_data.get("use_ollama_for_chunks", False),
|
|
188
|
+
ollama_model=llm_data.get("ollama_model", "llama3:8b"),
|
|
189
|
+
ollama_base_url=llm_data.get("ollama_base_url", "http://localhost:11434")
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# Create Jira config
|
|
193
|
+
jira_data = config_data.get("jira", {})
|
|
194
|
+
jira_config = JiraConfig(
|
|
195
|
+
enabled=jira_data.get("enabled", False),
|
|
196
|
+
ticket_pattern=jira_data.get("ticket_pattern")
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# Create Editor config
|
|
200
|
+
editor_data = config_data.get("editor", {})
|
|
201
|
+
editor_config = EditorConfig(
|
|
202
|
+
preferred_editor=editor_data.get("preferred_editor")
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
return AppConfig(llm=llm_config, jira=jira_config, editor=editor_config)
|
|
206
|
+
|
|
207
|
+
def save_config(self, config_path: Optional[Path] = None) -> None:
|
|
208
|
+
"""Save current configuration to file."""
|
|
209
|
+
if config_path is None:
|
|
210
|
+
# Save to global config by default
|
|
211
|
+
config_path = Path.home() / ".git-llm-tool" / "config.yaml"
|
|
212
|
+
|
|
213
|
+
# Ensure directory exists
|
|
214
|
+
config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
215
|
+
|
|
216
|
+
# Convert config to dict
|
|
217
|
+
config_dict = {
|
|
218
|
+
"llm": {
|
|
219
|
+
"default_model": self._config.llm.default_model,
|
|
220
|
+
"language": self._config.llm.language,
|
|
221
|
+
"api_keys": self._config.llm.api_keys,
|
|
222
|
+
"azure_openai": self._config.llm.azure_openai,
|
|
223
|
+
"chunking_threshold": self._config.llm.chunking_threshold,
|
|
224
|
+
"use_ollama_for_chunks": self._config.llm.use_ollama_for_chunks,
|
|
225
|
+
"ollama_model": self._config.llm.ollama_model,
|
|
226
|
+
"ollama_base_url": self._config.llm.ollama_base_url
|
|
227
|
+
},
|
|
228
|
+
"jira": {
|
|
229
|
+
"enabled": self._config.jira.enabled,
|
|
230
|
+
"ticket_pattern": self._config.jira.ticket_pattern
|
|
231
|
+
},
|
|
232
|
+
"editor": {
|
|
233
|
+
"preferred_editor": self._config.editor.preferred_editor
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
# Remove empty sections to keep config clean
|
|
238
|
+
if not config_dict["llm"]["api_keys"]:
|
|
239
|
+
del config_dict["llm"]["api_keys"]
|
|
240
|
+
if not config_dict["llm"]["azure_openai"]:
|
|
241
|
+
del config_dict["llm"]["azure_openai"]
|
|
242
|
+
|
|
243
|
+
# Remove None values from jira config
|
|
244
|
+
if config_dict["jira"]["ticket_pattern"] is None:
|
|
245
|
+
del config_dict["jira"]["ticket_pattern"]
|
|
246
|
+
|
|
247
|
+
# Remove None values from editor config
|
|
248
|
+
if config_dict["editor"]["preferred_editor"] is None:
|
|
249
|
+
del config_dict["editor"]["preferred_editor"]
|
|
250
|
+
if not config_dict["editor"]:
|
|
251
|
+
del config_dict["editor"]
|
|
252
|
+
|
|
253
|
+
try:
|
|
254
|
+
with open(config_path, 'w', encoding='utf-8') as f:
|
|
255
|
+
yaml.dump(config_dict, f, default_flow_style=False, indent=2)
|
|
256
|
+
except Exception as e:
|
|
257
|
+
raise ConfigError(f"Failed to save config to {config_path}: {e}")
|
|
258
|
+
|
|
259
|
+
def set_value(self, key_path: str, value: str) -> None:
|
|
260
|
+
"""Set a configuration value using dot notation (e.g., 'llm.default_model')."""
|
|
261
|
+
keys = key_path.split('.')
|
|
262
|
+
|
|
263
|
+
if len(keys) < 2:
|
|
264
|
+
raise ConfigError(f"Invalid key path: {key_path}")
|
|
265
|
+
|
|
266
|
+
# Handle llm.default_model
|
|
267
|
+
if keys[0] == "llm" and keys[1] == "default_model":
|
|
268
|
+
self._config.llm.default_model = value
|
|
269
|
+
# Handle llm.language
|
|
270
|
+
elif keys[0] == "llm" and keys[1] == "language":
|
|
271
|
+
self._config.llm.language = value
|
|
272
|
+
# Handle llm.use_ollama_for_chunks
|
|
273
|
+
elif keys[0] == "llm" and keys[1] == "use_ollama_for_chunks":
|
|
274
|
+
self._config.llm.use_ollama_for_chunks = value.lower() in ("true", "1", "yes", "on")
|
|
275
|
+
# Handle llm.ollama_model
|
|
276
|
+
elif keys[0] == "llm" and keys[1] == "ollama_model":
|
|
277
|
+
self._config.llm.ollama_model = value
|
|
278
|
+
# Handle llm.ollama_base_url
|
|
279
|
+
elif keys[0] == "llm" and keys[1] == "ollama_base_url":
|
|
280
|
+
self._config.llm.ollama_base_url = value
|
|
281
|
+
# Handle llm.api_keys.*
|
|
282
|
+
elif keys[0] == "llm" and keys[1] == "api_keys" and len(keys) == 3:
|
|
283
|
+
self._config.llm.api_keys[keys[2]] = value
|
|
284
|
+
# Handle llm.azure_openai.*
|
|
285
|
+
elif keys[0] == "llm" and keys[1] == "azure_openai" and len(keys) == 3:
|
|
286
|
+
self._config.llm.azure_openai[keys[2]] = value
|
|
287
|
+
# Handle jira.enabled
|
|
288
|
+
elif keys[0] == "jira" and keys[1] == "enabled":
|
|
289
|
+
self._config.jira.enabled = value.lower() in ("true", "1", "yes", "on")
|
|
290
|
+
# Handle jira.ticket_pattern
|
|
291
|
+
elif keys[0] == "jira" and keys[1] == "ticket_pattern":
|
|
292
|
+
self._config.jira.ticket_pattern = value
|
|
293
|
+
# Handle editor.preferred_editor
|
|
294
|
+
elif keys[0] == "editor" and keys[1] == "preferred_editor":
|
|
295
|
+
self._config.editor.preferred_editor = value
|
|
296
|
+
else:
|
|
297
|
+
raise ConfigError(f"Unknown configuration key: {key_path}")
|
|
298
|
+
|
|
299
|
+
def get_value(self, key_path: str) -> Any:
|
|
300
|
+
"""Get a configuration value using dot notation."""
|
|
301
|
+
keys = key_path.split('.')
|
|
302
|
+
|
|
303
|
+
if len(keys) < 2:
|
|
304
|
+
raise ConfigError(f"Invalid key path: {key_path}")
|
|
305
|
+
|
|
306
|
+
# Handle llm.default_model
|
|
307
|
+
if keys[0] == "llm" and keys[1] == "default_model":
|
|
308
|
+
return self._config.llm.default_model
|
|
309
|
+
# Handle llm.language
|
|
310
|
+
elif keys[0] == "llm" and keys[1] == "language":
|
|
311
|
+
return self._config.llm.language
|
|
312
|
+
# Handle llm.use_ollama_for_chunks
|
|
313
|
+
elif keys[0] == "llm" and keys[1] == "use_ollama_for_chunks":
|
|
314
|
+
return self._config.llm.use_ollama_for_chunks
|
|
315
|
+
# Handle llm.ollama_model
|
|
316
|
+
elif keys[0] == "llm" and keys[1] == "ollama_model":
|
|
317
|
+
return self._config.llm.ollama_model
|
|
318
|
+
# Handle llm.ollama_base_url
|
|
319
|
+
elif keys[0] == "llm" and keys[1] == "ollama_base_url":
|
|
320
|
+
return self._config.llm.ollama_base_url
|
|
321
|
+
# Handle llm.api_keys.*
|
|
322
|
+
elif keys[0] == "llm" and keys[1] == "api_keys" and len(keys) == 3:
|
|
323
|
+
return self._config.llm.api_keys.get(keys[2])
|
|
324
|
+
# Handle llm.azure_openai.*
|
|
325
|
+
elif keys[0] == "llm" and keys[1] == "azure_openai" and len(keys) == 3:
|
|
326
|
+
return self._config.llm.azure_openai.get(keys[2])
|
|
327
|
+
# Handle jira.enabled
|
|
328
|
+
elif keys[0] == "jira" and keys[1] == "enabled":
|
|
329
|
+
return self._config.jira.enabled
|
|
330
|
+
# Handle jira.ticket_pattern
|
|
331
|
+
elif keys[0] == "jira" and keys[1] == "ticket_pattern":
|
|
332
|
+
return self._config.jira.ticket_pattern
|
|
333
|
+
# Handle editor.preferred_editor
|
|
334
|
+
elif keys[0] == "editor" and keys[1] == "preferred_editor":
|
|
335
|
+
return self._config.editor.preferred_editor
|
|
336
|
+
else:
|
|
337
|
+
raise ConfigError(f"Unknown configuration key: {key_path}")
|
|
338
|
+
|
|
339
|
+
def reload(self) -> None:
|
|
340
|
+
"""Reload configuration from files."""
|
|
341
|
+
self._config = self._load_config()
|
|
342
|
+
|
|
343
|
+
@classmethod
|
|
344
|
+
def _reset_instance(cls) -> None:
|
|
345
|
+
"""Reset singleton instance for testing."""
|
|
346
|
+
cls._instance = None
|
|
347
|
+
cls._config = None
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def get_config() -> AppConfig:
|
|
351
|
+
"""Get the application configuration."""
|
|
352
|
+
return ConfigLoader().config
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"""Diff optimization strategies to reduce token usage."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import List, Tuple, Optional
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class DiffStats:
|
|
10
|
+
"""Statistics about diff optimization."""
|
|
11
|
+
original_size: int
|
|
12
|
+
optimized_size: int
|
|
13
|
+
compression_ratio: float
|
|
14
|
+
files_processed: int
|
|
15
|
+
lines_removed: int
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class DiffOptimizer:
|
|
19
|
+
"""Optimize git diffs to reduce token usage while preserving meaning."""
|
|
20
|
+
|
|
21
|
+
def __init__(self, max_context_lines: int = 3):
|
|
22
|
+
self.max_context_lines = max_context_lines
|
|
23
|
+
|
|
24
|
+
def optimize_diff(self, diff: str, aggressive: bool = False) -> Tuple[str, DiffStats]:
|
|
25
|
+
"""Optimize diff to reduce token usage."""
|
|
26
|
+
original_size = len(diff)
|
|
27
|
+
lines = diff.split('\n')
|
|
28
|
+
|
|
29
|
+
optimized_lines = []
|
|
30
|
+
files_processed = 0
|
|
31
|
+
lines_removed = 0
|
|
32
|
+
|
|
33
|
+
i = 0
|
|
34
|
+
while i < len(lines):
|
|
35
|
+
line = lines[i]
|
|
36
|
+
|
|
37
|
+
# Keep file headers
|
|
38
|
+
if line.startswith('diff --git'):
|
|
39
|
+
files_processed += 1
|
|
40
|
+
optimized_lines.append(line)
|
|
41
|
+
i += 1
|
|
42
|
+
continue
|
|
43
|
+
|
|
44
|
+
# Keep index and mode lines (compressed)
|
|
45
|
+
if line.startswith('index ') or line.startswith('new file mode') or line.startswith('deleted file mode'):
|
|
46
|
+
if not aggressive:
|
|
47
|
+
optimized_lines.append(line)
|
|
48
|
+
else:
|
|
49
|
+
lines_removed += 1
|
|
50
|
+
i += 1
|
|
51
|
+
continue
|
|
52
|
+
|
|
53
|
+
# Keep file paths but simplify
|
|
54
|
+
if line.startswith('--- ') or line.startswith('+++ '):
|
|
55
|
+
if aggressive:
|
|
56
|
+
# Simplify path names
|
|
57
|
+
simplified = re.sub(r'^(---|\+\+\+) [ab]/', r'\1 ', line)
|
|
58
|
+
optimized_lines.append(simplified)
|
|
59
|
+
else:
|
|
60
|
+
optimized_lines.append(line)
|
|
61
|
+
i += 1
|
|
62
|
+
continue
|
|
63
|
+
|
|
64
|
+
# Process hunk headers
|
|
65
|
+
if line.startswith('@@'):
|
|
66
|
+
optimized_lines.append(line)
|
|
67
|
+
i += 1
|
|
68
|
+
|
|
69
|
+
# Process the content of this hunk
|
|
70
|
+
hunk_lines, removed_count = self._process_hunk(lines[i:], aggressive)
|
|
71
|
+
optimized_lines.extend(hunk_lines)
|
|
72
|
+
lines_removed += removed_count
|
|
73
|
+
i += len(hunk_lines)
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
# Keep other lines as-is
|
|
77
|
+
optimized_lines.append(line)
|
|
78
|
+
i += 1
|
|
79
|
+
|
|
80
|
+
optimized_diff = '\n'.join(optimized_lines)
|
|
81
|
+
optimized_size = len(optimized_diff)
|
|
82
|
+
|
|
83
|
+
stats = DiffStats(
|
|
84
|
+
original_size=original_size,
|
|
85
|
+
optimized_size=optimized_size,
|
|
86
|
+
compression_ratio=optimized_size / original_size if original_size > 0 else 1.0,
|
|
87
|
+
files_processed=files_processed,
|
|
88
|
+
lines_removed=lines_removed
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
return optimized_diff, stats
|
|
92
|
+
|
|
93
|
+
def _process_hunk(self, lines: List[str], aggressive: bool) -> Tuple[List[str], int]:
|
|
94
|
+
"""Process a hunk to optimize context and changes."""
|
|
95
|
+
result = []
|
|
96
|
+
removed_count = 0
|
|
97
|
+
|
|
98
|
+
# Group consecutive context lines
|
|
99
|
+
context_buffer = []
|
|
100
|
+
|
|
101
|
+
for line in lines:
|
|
102
|
+
# Stop at next hunk or file
|
|
103
|
+
if line.startswith('@@') or line.startswith('diff --git'):
|
|
104
|
+
break
|
|
105
|
+
|
|
106
|
+
if line.startswith(' '): # Context line
|
|
107
|
+
context_buffer.append(line)
|
|
108
|
+
else: # Added or removed line
|
|
109
|
+
# Flush context buffer with limit
|
|
110
|
+
if context_buffer:
|
|
111
|
+
if aggressive and len(context_buffer) > self.max_context_lines * 2:
|
|
112
|
+
# Keep first and last few context lines
|
|
113
|
+
keep_start = context_buffer[:self.max_context_lines]
|
|
114
|
+
keep_end = context_buffer[-self.max_context_lines:]
|
|
115
|
+
result.extend(keep_start)
|
|
116
|
+
if len(context_buffer) > self.max_context_lines * 2:
|
|
117
|
+
result.append(f' ... ({len(context_buffer) - len(keep_start) - len(keep_end)} context lines omitted)')
|
|
118
|
+
result.extend(keep_end)
|
|
119
|
+
removed_count += len(context_buffer) - len(keep_start) - len(keep_end)
|
|
120
|
+
else:
|
|
121
|
+
result.extend(context_buffer)
|
|
122
|
+
context_buffer = []
|
|
123
|
+
|
|
124
|
+
# Keep change lines (but can compress whitespace-only changes)
|
|
125
|
+
if aggressive and self._is_whitespace_only_change(line):
|
|
126
|
+
result.append(f'{line[0]} (whitespace change)')
|
|
127
|
+
removed_count += 1
|
|
128
|
+
else:
|
|
129
|
+
result.append(line)
|
|
130
|
+
|
|
131
|
+
# Handle remaining context
|
|
132
|
+
if context_buffer:
|
|
133
|
+
if aggressive and len(context_buffer) > self.max_context_lines:
|
|
134
|
+
result.extend(context_buffer[:self.max_context_lines])
|
|
135
|
+
result.append(f' ... ({len(context_buffer) - self.max_context_lines} trailing context lines omitted)')
|
|
136
|
+
removed_count += len(context_buffer) - self.max_context_lines
|
|
137
|
+
else:
|
|
138
|
+
result.extend(context_buffer)
|
|
139
|
+
|
|
140
|
+
return result, removed_count
|
|
141
|
+
|
|
142
|
+
def _is_whitespace_only_change(self, line: str) -> bool:
|
|
143
|
+
"""Check if a line represents only whitespace changes."""
|
|
144
|
+
if len(line) < 2:
|
|
145
|
+
return False
|
|
146
|
+
|
|
147
|
+
content = line[1:] # Remove +/- prefix
|
|
148
|
+
return content.strip() == '' or re.match(r'^\s+$', content)
|
|
149
|
+
|
|
150
|
+
def smart_truncate(self, diff: str, max_tokens: int = 8000) -> str:
|
|
151
|
+
"""Smart truncation that preserves important parts of the diff."""
|
|
152
|
+
# Rough estimation: 1 token ≈ 4 characters
|
|
153
|
+
max_chars = max_tokens * 4
|
|
154
|
+
|
|
155
|
+
if len(diff) <= max_chars:
|
|
156
|
+
return diff
|
|
157
|
+
|
|
158
|
+
lines = diff.split('\n')
|
|
159
|
+
important_lines = []
|
|
160
|
+
regular_lines = []
|
|
161
|
+
|
|
162
|
+
for line in lines:
|
|
163
|
+
if self._is_important_line(line):
|
|
164
|
+
important_lines.append(line)
|
|
165
|
+
else:
|
|
166
|
+
regular_lines.append(line)
|
|
167
|
+
|
|
168
|
+
# Always keep important lines
|
|
169
|
+
result = important_lines[:]
|
|
170
|
+
current_size = sum(len(line) + 1 for line in result) # +1 for newline
|
|
171
|
+
|
|
172
|
+
# Add regular lines until we hit the limit
|
|
173
|
+
for line in regular_lines:
|
|
174
|
+
if current_size + len(line) + 1 > max_chars:
|
|
175
|
+
result.append('... (diff truncated to fit token limit)')
|
|
176
|
+
break
|
|
177
|
+
result.append(line)
|
|
178
|
+
current_size += len(line) + 1
|
|
179
|
+
|
|
180
|
+
return '\n'.join(result)
|
|
181
|
+
|
|
182
|
+
def _is_important_line(self, line: str) -> bool:
|
|
183
|
+
"""Determine if a line is important and should be preserved."""
|
|
184
|
+
return (
|
|
185
|
+
line.startswith('diff --git') or
|
|
186
|
+
line.startswith('+++') or
|
|
187
|
+
line.startswith('---') or
|
|
188
|
+
line.startswith('@@') or
|
|
189
|
+
(line.startswith('+') and not self._is_whitespace_only_change(line)) or
|
|
190
|
+
(line.startswith('-') and not self._is_whitespace_only_change(line))
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
def get_summary_stats(self, diff: str) -> dict:
|
|
194
|
+
"""Get summary statistics about a diff."""
|
|
195
|
+
lines = diff.split('\n')
|
|
196
|
+
|
|
197
|
+
stats = {
|
|
198
|
+
'total_lines': len(lines),
|
|
199
|
+
'files_changed': len([l for l in lines if l.startswith('diff --git')]),
|
|
200
|
+
'lines_added': len([l for l in lines if l.startswith('+') and not l.startswith('+++')]),
|
|
201
|
+
'lines_removed': len([l for l in lines if l.startswith('-') and not l.startswith('---')]),
|
|
202
|
+
'context_lines': len([l for l in lines if l.startswith(' ')]),
|
|
203
|
+
'estimated_tokens': len(diff) // 4 # Rough estimation
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
return stats
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Custom exceptions for git-llm-tool."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class GitLlmError(Exception):
|
|
5
|
+
"""Base exception for git-llm-tool."""
|
|
6
|
+
pass
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ConfigError(GitLlmError):
|
|
10
|
+
"""Configuration-related errors."""
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class GitError(GitLlmError):
|
|
15
|
+
"""Git operation errors."""
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ApiError(GitLlmError):
|
|
20
|
+
"""LLM API-related errors."""
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class JiraError(GitLlmError):
|
|
25
|
+
"""Jira integration errors."""
|
|
26
|
+
pass
|