git-llm-tool 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,352 @@
1
+ """Configuration management for git-llm-tool."""
2
+
3
+ import os
4
+ import yaml
5
+ from pathlib import Path
6
+ from typing import Dict, Any, Optional
7
+ from dataclasses import dataclass, field
8
+
9
+ from git_llm_tool.core.exceptions import ConfigError
10
+
11
+
12
+ @dataclass
13
+ class LlmConfig:
14
+ """LLM configuration settings."""
15
+ default_model: str = "gpt-4o"
16
+ language: str = "en"
17
+ api_keys: Dict[str, str] = field(default_factory=dict)
18
+ azure_openai: Dict[str, str] = field(default_factory=dict) # endpoint, api_version, deployment_name
19
+
20
+ # Processing configuration
21
+ chunking_threshold: int = 12000 # Token threshold to trigger chunking + parallel processing
22
+
23
+ # Ollama configuration for hybrid processing
24
+ use_ollama_for_chunks: bool = False # Use Ollama for chunk processing (map phase)
25
+ ollama_model: str = "llama3:8b" # Ollama model for chunk processing
26
+ ollama_base_url: str = "http://localhost:11434" # Ollama API base URL
27
+
28
+ # Parallel processing configuration
29
+ max_parallel_chunks: int = 4 # Maximum concurrent chunks for remote APIs
30
+ ollama_max_parallel_chunks: int = 16 # Maximum concurrent chunks for Ollama (local)
31
+
32
+ # Internal constants (not user-configurable)
33
+ _chunk_size: int = 6000 # Maximum chunk size in characters
34
+ _chunk_overlap: int = 300 # Overlap between chunks to maintain context
35
+ _max_parallel_chunks: int = 4 # Maximum number of chunks to process in parallel (remote APIs)
36
+ _ollama_max_parallel_chunks: int = 16 # Maximum number of chunks to process in parallel (Ollama local)
37
+ _chunk_processing_timeout: float = 120.0 # Timeout for each chunk processing (seconds)
38
+ _max_retries: int = 5 # Maximum number of retries
39
+ _initial_delay: float = 1.0 # Initial retry delay in seconds
40
+ _max_delay: float = 60.0 # Maximum retry delay in seconds
41
+ _backoff_multiplier: float = 2.0 # Exponential backoff multiplier
42
+ _rate_limit_delay: float = 0.5 # Minimum delay between requests
43
+ _max_context_lines: int = 3 # Maximum context lines to keep
44
+ _max_tokens: int = 8000 # Maximum tokens before truncation
45
+
46
+
47
+ @dataclass
48
+ class JiraConfig:
49
+ """Jira integration configuration."""
50
+ enabled: bool = False
51
+ ticket_pattern: Optional[str] = None # Jira ticket regex pattern
52
+
53
+
54
+ @dataclass
55
+ class EditorConfig:
56
+ """Editor configuration settings."""
57
+ preferred_editor: Optional[str] = None # e.g., "vi", "nano", "code", etc.
58
+
59
+
60
+ @dataclass
61
+ class AppConfig:
62
+ """Main application configuration."""
63
+ llm: LlmConfig = field(default_factory=LlmConfig)
64
+ jira: JiraConfig = field(default_factory=JiraConfig)
65
+ editor: EditorConfig = field(default_factory=EditorConfig)
66
+
67
+
68
+ class ConfigLoader:
69
+ """Singleton configuration loader with hierarchical configuration support."""
70
+
71
+ _instance = None
72
+ _config = None
73
+
74
+ def __new__(cls):
75
+ if cls._instance is None:
76
+ cls._instance = super().__new__(cls)
77
+ cls._instance._initialized = False
78
+ return cls._instance
79
+
80
+ def __init__(self):
81
+ if not getattr(self, '_initialized', False):
82
+ self._config = self._load_config()
83
+ self._initialized = True
84
+
85
+ @property
86
+ def config(self) -> AppConfig:
87
+ """Get the loaded configuration."""
88
+ return self._config
89
+
90
+ def _load_config(self) -> AppConfig:
91
+ """Load configuration from multiple sources in hierarchical order."""
92
+ config_data = {}
93
+
94
+ # 1. Load global config
95
+ global_config_path = Path.home() / ".git-llm-tool" / "config.yaml"
96
+ if global_config_path.exists():
97
+ config_data.update(self._load_yaml_file(global_config_path))
98
+
99
+ # 2. Load project config (override global)
100
+ project_config_path = Path(".git-llm-tool.yaml")
101
+ if project_config_path.exists():
102
+ project_config = self._load_yaml_file(project_config_path)
103
+ config_data = self._merge_configs(config_data, project_config)
104
+
105
+ # 3. Load environment variables (override file configs)
106
+ env_config = self._load_env_config()
107
+ config_data = self._merge_configs(config_data, env_config)
108
+
109
+ return self._create_app_config(config_data)
110
+
111
+ def _load_yaml_file(self, file_path: Path) -> Dict[str, Any]:
112
+ """Load YAML configuration file."""
113
+ try:
114
+ with open(file_path, 'r', encoding='utf-8') as f:
115
+ data = yaml.safe_load(f)
116
+ return data if data is not None else {}
117
+ except yaml.YAMLError as e:
118
+ raise ConfigError(f"Invalid YAML in {file_path}: {e}")
119
+ except Exception as e:
120
+ raise ConfigError(f"Failed to read config file {file_path}: {e}")
121
+
122
+ def _load_env_config(self) -> Dict[str, Any]:
123
+ """Load configuration from environment variables."""
124
+ config = {}
125
+
126
+ # API keys from environment
127
+ api_keys = {}
128
+ if openai_key := os.getenv("OPENAI_API_KEY"):
129
+ api_keys["openai"] = openai_key
130
+ if anthropic_key := os.getenv("ANTHROPIC_API_KEY"):
131
+ api_keys["anthropic"] = anthropic_key
132
+ if google_key := os.getenv("GOOGLE_API_KEY"):
133
+ api_keys["google"] = google_key
134
+
135
+ # Azure OpenAI configuration from environment
136
+ azure_openai = {}
137
+ if azure_endpoint := os.getenv("AZURE_OPENAI_ENDPOINT"):
138
+ azure_openai["endpoint"] = azure_endpoint
139
+ if azure_key := os.getenv("AZURE_OPENAI_API_KEY"):
140
+ api_keys["azure_openai"] = azure_key
141
+ if azure_version := os.getenv("AZURE_OPENAI_API_VERSION"):
142
+ azure_openai["api_version"] = azure_version
143
+ if azure_deployment := os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"):
144
+ azure_openai["deployment_name"] = azure_deployment
145
+
146
+ # Set up LLM config
147
+ if api_keys or azure_openai:
148
+ config["llm"] = {}
149
+ if api_keys:
150
+ config["llm"]["api_keys"] = api_keys
151
+ if azure_openai:
152
+ config["llm"]["azure_openai"] = azure_openai
153
+
154
+ # Other environment variables
155
+ if model := os.getenv("GIT_LLM_MODEL"):
156
+ config.setdefault("llm", {})["default_model"] = model
157
+
158
+ if language := os.getenv("GIT_LLM_LANGUAGE"):
159
+ config.setdefault("llm", {})["language"] = language
160
+
161
+ return config
162
+
163
+ def _merge_configs(self, base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
164
+ """Merge two configuration dictionaries recursively."""
165
+ result = base.copy()
166
+
167
+ for key, value in override.items():
168
+ if key in result and isinstance(result[key], dict) and isinstance(value, dict):
169
+ result[key] = self._merge_configs(result[key], value)
170
+ else:
171
+ result[key] = value
172
+
173
+ return result
174
+
175
+ def _create_app_config(self, config_data: Dict[str, Any]) -> AppConfig:
176
+ """Create AppConfig instance from configuration data."""
177
+ # Create LLM config
178
+ llm_data = config_data.get("llm", {})
179
+ llm_config = LlmConfig(
180
+ default_model=llm_data.get("default_model", "gpt-4o"),
181
+ language=llm_data.get("language", "en"),
182
+ api_keys=llm_data.get("api_keys", {}),
183
+ azure_openai=llm_data.get("azure_openai", {}),
184
+ # Processing settings
185
+ chunking_threshold=llm_data.get("chunking_threshold", 12000),
186
+ # Ollama settings
187
+ use_ollama_for_chunks=llm_data.get("use_ollama_for_chunks", False),
188
+ ollama_model=llm_data.get("ollama_model", "llama3:8b"),
189
+ ollama_base_url=llm_data.get("ollama_base_url", "http://localhost:11434")
190
+ )
191
+
192
+ # Create Jira config
193
+ jira_data = config_data.get("jira", {})
194
+ jira_config = JiraConfig(
195
+ enabled=jira_data.get("enabled", False),
196
+ ticket_pattern=jira_data.get("ticket_pattern")
197
+ )
198
+
199
+ # Create Editor config
200
+ editor_data = config_data.get("editor", {})
201
+ editor_config = EditorConfig(
202
+ preferred_editor=editor_data.get("preferred_editor")
203
+ )
204
+
205
+ return AppConfig(llm=llm_config, jira=jira_config, editor=editor_config)
206
+
207
+ def save_config(self, config_path: Optional[Path] = None) -> None:
208
+ """Save current configuration to file."""
209
+ if config_path is None:
210
+ # Save to global config by default
211
+ config_path = Path.home() / ".git-llm-tool" / "config.yaml"
212
+
213
+ # Ensure directory exists
214
+ config_path.parent.mkdir(parents=True, exist_ok=True)
215
+
216
+ # Convert config to dict
217
+ config_dict = {
218
+ "llm": {
219
+ "default_model": self._config.llm.default_model,
220
+ "language": self._config.llm.language,
221
+ "api_keys": self._config.llm.api_keys,
222
+ "azure_openai": self._config.llm.azure_openai,
223
+ "chunking_threshold": self._config.llm.chunking_threshold,
224
+ "use_ollama_for_chunks": self._config.llm.use_ollama_for_chunks,
225
+ "ollama_model": self._config.llm.ollama_model,
226
+ "ollama_base_url": self._config.llm.ollama_base_url
227
+ },
228
+ "jira": {
229
+ "enabled": self._config.jira.enabled,
230
+ "ticket_pattern": self._config.jira.ticket_pattern
231
+ },
232
+ "editor": {
233
+ "preferred_editor": self._config.editor.preferred_editor
234
+ }
235
+ }
236
+
237
+ # Remove empty sections to keep config clean
238
+ if not config_dict["llm"]["api_keys"]:
239
+ del config_dict["llm"]["api_keys"]
240
+ if not config_dict["llm"]["azure_openai"]:
241
+ del config_dict["llm"]["azure_openai"]
242
+
243
+ # Remove None values from jira config
244
+ if config_dict["jira"]["ticket_pattern"] is None:
245
+ del config_dict["jira"]["ticket_pattern"]
246
+
247
+ # Remove None values from editor config
248
+ if config_dict["editor"]["preferred_editor"] is None:
249
+ del config_dict["editor"]["preferred_editor"]
250
+ if not config_dict["editor"]:
251
+ del config_dict["editor"]
252
+
253
+ try:
254
+ with open(config_path, 'w', encoding='utf-8') as f:
255
+ yaml.dump(config_dict, f, default_flow_style=False, indent=2)
256
+ except Exception as e:
257
+ raise ConfigError(f"Failed to save config to {config_path}: {e}")
258
+
259
+ def set_value(self, key_path: str, value: str) -> None:
260
+ """Set a configuration value using dot notation (e.g., 'llm.default_model')."""
261
+ keys = key_path.split('.')
262
+
263
+ if len(keys) < 2:
264
+ raise ConfigError(f"Invalid key path: {key_path}")
265
+
266
+ # Handle llm.default_model
267
+ if keys[0] == "llm" and keys[1] == "default_model":
268
+ self._config.llm.default_model = value
269
+ # Handle llm.language
270
+ elif keys[0] == "llm" and keys[1] == "language":
271
+ self._config.llm.language = value
272
+ # Handle llm.use_ollama_for_chunks
273
+ elif keys[0] == "llm" and keys[1] == "use_ollama_for_chunks":
274
+ self._config.llm.use_ollama_for_chunks = value.lower() in ("true", "1", "yes", "on")
275
+ # Handle llm.ollama_model
276
+ elif keys[0] == "llm" and keys[1] == "ollama_model":
277
+ self._config.llm.ollama_model = value
278
+ # Handle llm.ollama_base_url
279
+ elif keys[0] == "llm" and keys[1] == "ollama_base_url":
280
+ self._config.llm.ollama_base_url = value
281
+ # Handle llm.api_keys.*
282
+ elif keys[0] == "llm" and keys[1] == "api_keys" and len(keys) == 3:
283
+ self._config.llm.api_keys[keys[2]] = value
284
+ # Handle llm.azure_openai.*
285
+ elif keys[0] == "llm" and keys[1] == "azure_openai" and len(keys) == 3:
286
+ self._config.llm.azure_openai[keys[2]] = value
287
+ # Handle jira.enabled
288
+ elif keys[0] == "jira" and keys[1] == "enabled":
289
+ self._config.jira.enabled = value.lower() in ("true", "1", "yes", "on")
290
+ # Handle jira.ticket_pattern
291
+ elif keys[0] == "jira" and keys[1] == "ticket_pattern":
292
+ self._config.jira.ticket_pattern = value
293
+ # Handle editor.preferred_editor
294
+ elif keys[0] == "editor" and keys[1] == "preferred_editor":
295
+ self._config.editor.preferred_editor = value
296
+ else:
297
+ raise ConfigError(f"Unknown configuration key: {key_path}")
298
+
299
+ def get_value(self, key_path: str) -> Any:
300
+ """Get a configuration value using dot notation."""
301
+ keys = key_path.split('.')
302
+
303
+ if len(keys) < 2:
304
+ raise ConfigError(f"Invalid key path: {key_path}")
305
+
306
+ # Handle llm.default_model
307
+ if keys[0] == "llm" and keys[1] == "default_model":
308
+ return self._config.llm.default_model
309
+ # Handle llm.language
310
+ elif keys[0] == "llm" and keys[1] == "language":
311
+ return self._config.llm.language
312
+ # Handle llm.use_ollama_for_chunks
313
+ elif keys[0] == "llm" and keys[1] == "use_ollama_for_chunks":
314
+ return self._config.llm.use_ollama_for_chunks
315
+ # Handle llm.ollama_model
316
+ elif keys[0] == "llm" and keys[1] == "ollama_model":
317
+ return self._config.llm.ollama_model
318
+ # Handle llm.ollama_base_url
319
+ elif keys[0] == "llm" and keys[1] == "ollama_base_url":
320
+ return self._config.llm.ollama_base_url
321
+ # Handle llm.api_keys.*
322
+ elif keys[0] == "llm" and keys[1] == "api_keys" and len(keys) == 3:
323
+ return self._config.llm.api_keys.get(keys[2])
324
+ # Handle llm.azure_openai.*
325
+ elif keys[0] == "llm" and keys[1] == "azure_openai" and len(keys) == 3:
326
+ return self._config.llm.azure_openai.get(keys[2])
327
+ # Handle jira.enabled
328
+ elif keys[0] == "jira" and keys[1] == "enabled":
329
+ return self._config.jira.enabled
330
+ # Handle jira.ticket_pattern
331
+ elif keys[0] == "jira" and keys[1] == "ticket_pattern":
332
+ return self._config.jira.ticket_pattern
333
+ # Handle editor.preferred_editor
334
+ elif keys[0] == "editor" and keys[1] == "preferred_editor":
335
+ return self._config.editor.preferred_editor
336
+ else:
337
+ raise ConfigError(f"Unknown configuration key: {key_path}")
338
+
339
+ def reload(self) -> None:
340
+ """Reload configuration from files."""
341
+ self._config = self._load_config()
342
+
343
+ @classmethod
344
+ def _reset_instance(cls) -> None:
345
+ """Reset singleton instance for testing."""
346
+ cls._instance = None
347
+ cls._config = None
348
+
349
+
350
+ def get_config() -> AppConfig:
351
+ """Get the application configuration."""
352
+ return ConfigLoader().config
@@ -0,0 +1,206 @@
1
+ """Diff optimization strategies to reduce token usage."""
2
+
3
+ import re
4
+ from typing import List, Tuple, Optional
5
+ from dataclasses import dataclass
6
+
7
+
8
+ @dataclass
9
+ class DiffStats:
10
+ """Statistics about diff optimization."""
11
+ original_size: int
12
+ optimized_size: int
13
+ compression_ratio: float
14
+ files_processed: int
15
+ lines_removed: int
16
+
17
+
18
+ class DiffOptimizer:
19
+ """Optimize git diffs to reduce token usage while preserving meaning."""
20
+
21
+ def __init__(self, max_context_lines: int = 3):
22
+ self.max_context_lines = max_context_lines
23
+
24
+ def optimize_diff(self, diff: str, aggressive: bool = False) -> Tuple[str, DiffStats]:
25
+ """Optimize diff to reduce token usage."""
26
+ original_size = len(diff)
27
+ lines = diff.split('\n')
28
+
29
+ optimized_lines = []
30
+ files_processed = 0
31
+ lines_removed = 0
32
+
33
+ i = 0
34
+ while i < len(lines):
35
+ line = lines[i]
36
+
37
+ # Keep file headers
38
+ if line.startswith('diff --git'):
39
+ files_processed += 1
40
+ optimized_lines.append(line)
41
+ i += 1
42
+ continue
43
+
44
+ # Keep index and mode lines (compressed)
45
+ if line.startswith('index ') or line.startswith('new file mode') or line.startswith('deleted file mode'):
46
+ if not aggressive:
47
+ optimized_lines.append(line)
48
+ else:
49
+ lines_removed += 1
50
+ i += 1
51
+ continue
52
+
53
+ # Keep file paths but simplify
54
+ if line.startswith('--- ') or line.startswith('+++ '):
55
+ if aggressive:
56
+ # Simplify path names
57
+ simplified = re.sub(r'^(---|\+\+\+) [ab]/', r'\1 ', line)
58
+ optimized_lines.append(simplified)
59
+ else:
60
+ optimized_lines.append(line)
61
+ i += 1
62
+ continue
63
+
64
+ # Process hunk headers
65
+ if line.startswith('@@'):
66
+ optimized_lines.append(line)
67
+ i += 1
68
+
69
+ # Process the content of this hunk
70
+ hunk_lines, removed_count = self._process_hunk(lines[i:], aggressive)
71
+ optimized_lines.extend(hunk_lines)
72
+ lines_removed += removed_count
73
+ i += len(hunk_lines)
74
+ continue
75
+
76
+ # Keep other lines as-is
77
+ optimized_lines.append(line)
78
+ i += 1
79
+
80
+ optimized_diff = '\n'.join(optimized_lines)
81
+ optimized_size = len(optimized_diff)
82
+
83
+ stats = DiffStats(
84
+ original_size=original_size,
85
+ optimized_size=optimized_size,
86
+ compression_ratio=optimized_size / original_size if original_size > 0 else 1.0,
87
+ files_processed=files_processed,
88
+ lines_removed=lines_removed
89
+ )
90
+
91
+ return optimized_diff, stats
92
+
93
+ def _process_hunk(self, lines: List[str], aggressive: bool) -> Tuple[List[str], int]:
94
+ """Process a hunk to optimize context and changes."""
95
+ result = []
96
+ removed_count = 0
97
+
98
+ # Group consecutive context lines
99
+ context_buffer = []
100
+
101
+ for line in lines:
102
+ # Stop at next hunk or file
103
+ if line.startswith('@@') or line.startswith('diff --git'):
104
+ break
105
+
106
+ if line.startswith(' '): # Context line
107
+ context_buffer.append(line)
108
+ else: # Added or removed line
109
+ # Flush context buffer with limit
110
+ if context_buffer:
111
+ if aggressive and len(context_buffer) > self.max_context_lines * 2:
112
+ # Keep first and last few context lines
113
+ keep_start = context_buffer[:self.max_context_lines]
114
+ keep_end = context_buffer[-self.max_context_lines:]
115
+ result.extend(keep_start)
116
+ if len(context_buffer) > self.max_context_lines * 2:
117
+ result.append(f' ... ({len(context_buffer) - len(keep_start) - len(keep_end)} context lines omitted)')
118
+ result.extend(keep_end)
119
+ removed_count += len(context_buffer) - len(keep_start) - len(keep_end)
120
+ else:
121
+ result.extend(context_buffer)
122
+ context_buffer = []
123
+
124
+ # Keep change lines (but can compress whitespace-only changes)
125
+ if aggressive and self._is_whitespace_only_change(line):
126
+ result.append(f'{line[0]} (whitespace change)')
127
+ removed_count += 1
128
+ else:
129
+ result.append(line)
130
+
131
+ # Handle remaining context
132
+ if context_buffer:
133
+ if aggressive and len(context_buffer) > self.max_context_lines:
134
+ result.extend(context_buffer[:self.max_context_lines])
135
+ result.append(f' ... ({len(context_buffer) - self.max_context_lines} trailing context lines omitted)')
136
+ removed_count += len(context_buffer) - self.max_context_lines
137
+ else:
138
+ result.extend(context_buffer)
139
+
140
+ return result, removed_count
141
+
142
+ def _is_whitespace_only_change(self, line: str) -> bool:
143
+ """Check if a line represents only whitespace changes."""
144
+ if len(line) < 2:
145
+ return False
146
+
147
+ content = line[1:] # Remove +/- prefix
148
+ return content.strip() == '' or re.match(r'^\s+$', content)
149
+
150
+ def smart_truncate(self, diff: str, max_tokens: int = 8000) -> str:
151
+ """Smart truncation that preserves important parts of the diff."""
152
+ # Rough estimation: 1 token ≈ 4 characters
153
+ max_chars = max_tokens * 4
154
+
155
+ if len(diff) <= max_chars:
156
+ return diff
157
+
158
+ lines = diff.split('\n')
159
+ important_lines = []
160
+ regular_lines = []
161
+
162
+ for line in lines:
163
+ if self._is_important_line(line):
164
+ important_lines.append(line)
165
+ else:
166
+ regular_lines.append(line)
167
+
168
+ # Always keep important lines
169
+ result = important_lines[:]
170
+ current_size = sum(len(line) + 1 for line in result) # +1 for newline
171
+
172
+ # Add regular lines until we hit the limit
173
+ for line in regular_lines:
174
+ if current_size + len(line) + 1 > max_chars:
175
+ result.append('... (diff truncated to fit token limit)')
176
+ break
177
+ result.append(line)
178
+ current_size += len(line) + 1
179
+
180
+ return '\n'.join(result)
181
+
182
+ def _is_important_line(self, line: str) -> bool:
183
+ """Determine if a line is important and should be preserved."""
184
+ return (
185
+ line.startswith('diff --git') or
186
+ line.startswith('+++') or
187
+ line.startswith('---') or
188
+ line.startswith('@@') or
189
+ (line.startswith('+') and not self._is_whitespace_only_change(line)) or
190
+ (line.startswith('-') and not self._is_whitespace_only_change(line))
191
+ )
192
+
193
+ def get_summary_stats(self, diff: str) -> dict:
194
+ """Get summary statistics about a diff."""
195
+ lines = diff.split('\n')
196
+
197
+ stats = {
198
+ 'total_lines': len(lines),
199
+ 'files_changed': len([l for l in lines if l.startswith('diff --git')]),
200
+ 'lines_added': len([l for l in lines if l.startswith('+') and not l.startswith('+++')]),
201
+ 'lines_removed': len([l for l in lines if l.startswith('-') and not l.startswith('---')]),
202
+ 'context_lines': len([l for l in lines if l.startswith(' ')]),
203
+ 'estimated_tokens': len(diff) // 4 # Rough estimation
204
+ }
205
+
206
+ return stats
@@ -0,0 +1,26 @@
1
+ """Custom exceptions for git-llm-tool."""
2
+
3
+
4
+ class GitLlmError(Exception):
5
+ """Base exception for git-llm-tool."""
6
+ pass
7
+
8
+
9
+ class ConfigError(GitLlmError):
10
+ """Configuration-related errors."""
11
+ pass
12
+
13
+
14
+ class GitError(GitLlmError):
15
+ """Git operation errors."""
16
+ pass
17
+
18
+
19
+ class ApiError(GitLlmError):
20
+ """LLM API-related errors."""
21
+ pass
22
+
23
+
24
+ class JiraError(GitLlmError):
25
+ """Jira integration errors."""
26
+ pass