gac 0.15.1__py3-none-any.whl → 0.15.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gac might be problematic. Click here for more details.
- gac/__init__.py +15 -0
- gac/__version__.py +3 -0
- gac/ai.py +166 -0
- gac/cli.py +130 -0
- gac/config.py +32 -0
- gac/config_cli.py +62 -0
- gac/constants.py +149 -0
- gac/diff_cli.py +177 -0
- gac/errors.py +217 -0
- gac/git.py +158 -0
- gac/init_cli.py +45 -0
- gac/main.py +254 -0
- gac/preprocess.py +506 -0
- gac/prompt.py +355 -0
- gac/utils.py +133 -0
- {gac-0.15.1.dist-info → gac-0.15.3.dist-info}/METADATA +4 -4
- gac-0.15.3.dist-info/RECORD +20 -0
- gac-0.15.1.dist-info/RECORD +0 -5
- {gac-0.15.1.dist-info → gac-0.15.3.dist-info}/WHEEL +0 -0
- {gac-0.15.1.dist-info → gac-0.15.3.dist-info}/entry_points.txt +0 -0
- {gac-0.15.1.dist-info → gac-0.15.3.dist-info}/licenses/LICENSE +0 -0
gac/__init__.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Git Auto Commit (gac) - Generate commit messages using AI."""
|
|
2
|
+
|
|
3
|
+
from gac.__version__ import __version__
|
|
4
|
+
from gac.ai import generate_commit_message
|
|
5
|
+
from gac.git import get_staged_files, push_changes
|
|
6
|
+
from gac.prompt import build_prompt, clean_commit_message
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"__version__",
|
|
10
|
+
"generate_commit_message",
|
|
11
|
+
"build_prompt",
|
|
12
|
+
"clean_commit_message",
|
|
13
|
+
"get_staged_files",
|
|
14
|
+
"push_changes",
|
|
15
|
+
]
|
gac/__version__.py
ADDED
gac/ai.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
"""AI provider integration for gac.
|
|
2
|
+
|
|
3
|
+
This module provides core functionality for AI provider interaction.
|
|
4
|
+
It consolidates all AI-related functionality including token counting and commit message generation.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import time
|
|
9
|
+
from functools import lru_cache
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
import aisuite as ai
|
|
13
|
+
import tiktoken
|
|
14
|
+
from halo import Halo
|
|
15
|
+
|
|
16
|
+
from gac.constants import EnvDefaults, Utility
|
|
17
|
+
from gac.errors import AIError
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def count_tokens(content: str | list[dict[str, str]] | dict[str, Any], model: str) -> int:
|
|
23
|
+
"""Count tokens in content using the model's tokenizer."""
|
|
24
|
+
text = extract_text_content(content)
|
|
25
|
+
if not text:
|
|
26
|
+
return 0
|
|
27
|
+
|
|
28
|
+
if model.startswith("anthropic"):
|
|
29
|
+
import anthropic
|
|
30
|
+
|
|
31
|
+
return anthropic.Client().messages.count_tokens(
|
|
32
|
+
model=model,
|
|
33
|
+
messages=[{"role": "user", "content": text}],
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
encoding = get_encoding(model)
|
|
38
|
+
return len(encoding.encode(text))
|
|
39
|
+
except Exception as e:
|
|
40
|
+
logger.error(f"Error counting tokens: {e}")
|
|
41
|
+
return len(text) // 4
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def extract_text_content(content: str | list[dict[str, str]] | dict[str, Any]) -> str:
|
|
45
|
+
"""Extract text content from various input formats."""
|
|
46
|
+
if isinstance(content, str):
|
|
47
|
+
return content
|
|
48
|
+
elif isinstance(content, list):
|
|
49
|
+
return "\n".join(msg["content"] for msg in content if isinstance(msg, dict) and "content" in msg)
|
|
50
|
+
elif isinstance(content, dict) and "content" in content:
|
|
51
|
+
return content["content"]
|
|
52
|
+
return ""
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@lru_cache(maxsize=1)
|
|
56
|
+
def get_encoding(model: str) -> tiktoken.Encoding:
|
|
57
|
+
"""Get the appropriate encoding for a given model."""
|
|
58
|
+
model_name = model.split(":")[-1] if ":" in model else model
|
|
59
|
+
try:
|
|
60
|
+
return tiktoken.encoding_for_model(model_name)
|
|
61
|
+
except KeyError:
|
|
62
|
+
return tiktoken.get_encoding(Utility.DEFAULT_ENCODING)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def generate_commit_message(
|
|
66
|
+
model: str,
|
|
67
|
+
prompt: str,
|
|
68
|
+
temperature: float = EnvDefaults.TEMPERATURE,
|
|
69
|
+
max_tokens: int = EnvDefaults.MAX_OUTPUT_TOKENS,
|
|
70
|
+
max_retries: int = EnvDefaults.MAX_RETRIES,
|
|
71
|
+
quiet: bool = False,
|
|
72
|
+
) -> str:
|
|
73
|
+
"""Generate a commit message using aisuite.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
model: The model to use in provider:model_name format (e.g., 'anthropic:claude-3-5-haiku-latest')
|
|
77
|
+
prompt: The formatted prompt containing diff and context
|
|
78
|
+
temperature: Controls randomness (0.0-1.0), lower values are more deterministic
|
|
79
|
+
max_tokens: Maximum tokens in the response
|
|
80
|
+
max_retries: Number of retry attempts if generation fails
|
|
81
|
+
quiet: If True, suppress progress indicators
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
A formatted commit message string
|
|
85
|
+
|
|
86
|
+
Raises:
|
|
87
|
+
AIError: If generation fails after max_retries attempts
|
|
88
|
+
|
|
89
|
+
Example:
|
|
90
|
+
>>> model = "anthropic:claude-3-5-haiku-latest"
|
|
91
|
+
>>> prompt = build_prompt("On branch main", "diff --git a/README.md b/README.md")
|
|
92
|
+
>>> generate_commit_message(model, prompt)
|
|
93
|
+
'docs: Update README with installation instructions'
|
|
94
|
+
"""
|
|
95
|
+
try:
|
|
96
|
+
_, _ = model.split(":", 1)
|
|
97
|
+
except ValueError as err:
|
|
98
|
+
raise AIError.model_error(
|
|
99
|
+
f"Invalid model format: {model}. Please use the format 'provider:model_name'."
|
|
100
|
+
) from err
|
|
101
|
+
|
|
102
|
+
client = ai.Client()
|
|
103
|
+
|
|
104
|
+
if quiet:
|
|
105
|
+
spinner = None
|
|
106
|
+
else:
|
|
107
|
+
spinner = Halo(text=f"Generating commit message with {model}...", spinner="dots")
|
|
108
|
+
spinner.start()
|
|
109
|
+
|
|
110
|
+
last_error = None
|
|
111
|
+
|
|
112
|
+
retry_count = 0
|
|
113
|
+
while retry_count < max_retries:
|
|
114
|
+
try:
|
|
115
|
+
logger.debug(f"Trying with model {model} (attempt {retry_count + 1}/{max_retries})")
|
|
116
|
+
response = client.chat.completions.create(
|
|
117
|
+
model=model,
|
|
118
|
+
messages=[{"role": "user", "content": prompt}],
|
|
119
|
+
temperature=temperature,
|
|
120
|
+
max_tokens=max_tokens,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
message = response.choices[0].message.content if hasattr(response, "choices") else response.content
|
|
124
|
+
|
|
125
|
+
if spinner:
|
|
126
|
+
spinner.succeed(f"Generated commit message with {model}")
|
|
127
|
+
|
|
128
|
+
return message
|
|
129
|
+
|
|
130
|
+
except Exception as e:
|
|
131
|
+
last_error = e
|
|
132
|
+
retry_count += 1
|
|
133
|
+
|
|
134
|
+
if retry_count == max_retries:
|
|
135
|
+
logger.warning(f"Error generating commit message: {e}. Giving up.")
|
|
136
|
+
break
|
|
137
|
+
|
|
138
|
+
wait_time = 2**retry_count
|
|
139
|
+
logger.warning(f"Error generating commit message: {e}. Retrying in {wait_time}s...")
|
|
140
|
+
if spinner:
|
|
141
|
+
for i in range(wait_time, 0, -1):
|
|
142
|
+
spinner.text = f"Retry {retry_count}/{max_retries} in {i}s..."
|
|
143
|
+
time.sleep(1)
|
|
144
|
+
else:
|
|
145
|
+
time.sleep(wait_time)
|
|
146
|
+
if spinner:
|
|
147
|
+
spinner.fail("Failed to generate commit message")
|
|
148
|
+
|
|
149
|
+
error_str = str(last_error).lower()
|
|
150
|
+
|
|
151
|
+
if "api key" in error_str or "unauthorized" in error_str or "authentication" in error_str:
|
|
152
|
+
error_type = "authentication"
|
|
153
|
+
elif "timeout" in error_str:
|
|
154
|
+
error_type = "timeout"
|
|
155
|
+
elif "rate limit" in error_str or "too many requests" in error_str:
|
|
156
|
+
error_type = "rate_limit"
|
|
157
|
+
elif "connect" in error_str or "network" in error_str:
|
|
158
|
+
error_type = "connection"
|
|
159
|
+
elif "model" in error_str or "not found" in error_str:
|
|
160
|
+
error_type = "model"
|
|
161
|
+
else:
|
|
162
|
+
error_type = "unknown"
|
|
163
|
+
|
|
164
|
+
raise AIError(
|
|
165
|
+
f"Failed to generate commit message after {max_retries} attempts: {last_error}", error_type=error_type
|
|
166
|
+
)
|
gac/cli.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# flake8: noqa: E304
|
|
2
|
+
|
|
3
|
+
"""CLI entry point for gac.
|
|
4
|
+
|
|
5
|
+
Defines the Click-based command-line interface and delegates execution to the main workflow.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import sys
|
|
10
|
+
|
|
11
|
+
import click
|
|
12
|
+
|
|
13
|
+
from gac import __version__
|
|
14
|
+
from gac.config import load_config
|
|
15
|
+
from gac.config_cli import config as config_cli
|
|
16
|
+
from gac.constants import Logging
|
|
17
|
+
from gac.diff_cli import diff as diff_cli
|
|
18
|
+
from gac.errors import handle_error
|
|
19
|
+
from gac.init_cli import init as init_cli
|
|
20
|
+
from gac.main import main
|
|
21
|
+
from gac.utils import setup_logging
|
|
22
|
+
|
|
23
|
+
config = load_config()
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@click.group(invoke_without_command=True, context_settings={"ignore_unknown_options": True})
|
|
28
|
+
# Git workflow options
|
|
29
|
+
@click.option("--add-all", "-a", is_flag=True, help="Stage all changes before committing")
|
|
30
|
+
@click.option("--push", "-p", is_flag=True, help="Push changes to remote after committing")
|
|
31
|
+
@click.option("--dry-run", is_flag=True, help="Dry run the commit workflow")
|
|
32
|
+
@click.option("--yes", "-y", is_flag=True, help="Skip confirmation prompt")
|
|
33
|
+
# Commit message options
|
|
34
|
+
@click.option("--one-liner", "-o", is_flag=True, help="Generate a single-line commit message")
|
|
35
|
+
@click.option("--show-prompt", is_flag=True, help="Show the prompt sent to the LLM")
|
|
36
|
+
@click.option(
|
|
37
|
+
"--scope",
|
|
38
|
+
"-s",
|
|
39
|
+
is_flag=False,
|
|
40
|
+
flag_value="",
|
|
41
|
+
default=None,
|
|
42
|
+
help="Add a scope to the commit message. If used without a value, the LLM will determine an appropriate scope.",
|
|
43
|
+
)
|
|
44
|
+
@click.option("--hint", "-h", default="", help="Additional context to include in the prompt")
|
|
45
|
+
# Model options
|
|
46
|
+
@click.option("--model", "-m", help="Override the default model (format: 'provider:model_name')")
|
|
47
|
+
# Output options
|
|
48
|
+
@click.option("--quiet", "-q", is_flag=True, help="Suppress non-error output")
|
|
49
|
+
@click.option("--verbose", "-v", is_flag=True, help="Increase output verbosity to INFO")
|
|
50
|
+
@click.option(
|
|
51
|
+
"--log-level",
|
|
52
|
+
default=config["log_level"],
|
|
53
|
+
type=click.Choice(Logging.LEVELS, case_sensitive=False),
|
|
54
|
+
help=f"Set log level (default: {config['log_level']})",
|
|
55
|
+
)
|
|
56
|
+
# Advanced options
|
|
57
|
+
@click.option("--no-verify", is_flag=True, help="Skip pre-commit hooks when committing")
|
|
58
|
+
# Other options
|
|
59
|
+
@click.option("--version", is_flag=True, help="Show the version of the Git Auto Commit (gac) tool")
|
|
60
|
+
@click.pass_context
|
|
61
|
+
def cli(
|
|
62
|
+
ctx: click.Context,
|
|
63
|
+
add_all: bool = False,
|
|
64
|
+
log_level: str = config["log_level"],
|
|
65
|
+
one_liner: bool = False,
|
|
66
|
+
push: bool = False,
|
|
67
|
+
show_prompt: bool = False,
|
|
68
|
+
scope: str = None,
|
|
69
|
+
quiet: bool = False,
|
|
70
|
+
yes: bool = False,
|
|
71
|
+
hint: str = "",
|
|
72
|
+
model: str = None,
|
|
73
|
+
version: bool = False,
|
|
74
|
+
dry_run: bool = False,
|
|
75
|
+
verbose: bool = False,
|
|
76
|
+
no_verify: bool = False,
|
|
77
|
+
) -> None:
|
|
78
|
+
"""Git Auto Commit - Generate commit messages with AI."""
|
|
79
|
+
if ctx.invoked_subcommand is None:
|
|
80
|
+
if version:
|
|
81
|
+
print(f"Git Auto Commit (gac) version: {__version__}")
|
|
82
|
+
sys.exit(0)
|
|
83
|
+
effective_log_level = log_level
|
|
84
|
+
if verbose and log_level not in ("DEBUG", "INFO"):
|
|
85
|
+
effective_log_level = "INFO"
|
|
86
|
+
if quiet:
|
|
87
|
+
effective_log_level = "ERROR"
|
|
88
|
+
setup_logging(effective_log_level)
|
|
89
|
+
logger.info("Starting gac")
|
|
90
|
+
try:
|
|
91
|
+
main(
|
|
92
|
+
stage_all=add_all,
|
|
93
|
+
model=model,
|
|
94
|
+
hint=hint,
|
|
95
|
+
one_liner=one_liner,
|
|
96
|
+
show_prompt=show_prompt,
|
|
97
|
+
scope=scope,
|
|
98
|
+
require_confirmation=not yes,
|
|
99
|
+
push=push,
|
|
100
|
+
quiet=quiet,
|
|
101
|
+
dry_run=dry_run,
|
|
102
|
+
no_verify=no_verify,
|
|
103
|
+
)
|
|
104
|
+
except Exception as e:
|
|
105
|
+
handle_error(e, exit_program=True)
|
|
106
|
+
else:
|
|
107
|
+
ctx.obj = {
|
|
108
|
+
"add_all": add_all,
|
|
109
|
+
"log_level": log_level,
|
|
110
|
+
"one_liner": one_liner,
|
|
111
|
+
"push": push,
|
|
112
|
+
"show_prompt": show_prompt,
|
|
113
|
+
"scope": scope,
|
|
114
|
+
"quiet": quiet,
|
|
115
|
+
"yes": yes,
|
|
116
|
+
"hint": hint,
|
|
117
|
+
"model": model,
|
|
118
|
+
"version": version,
|
|
119
|
+
"dry_run": dry_run,
|
|
120
|
+
"verbose": verbose,
|
|
121
|
+
"no_verify": no_verify,
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
cli.add_command(config_cli)
|
|
126
|
+
cli.add_command(init_cli)
|
|
127
|
+
cli.add_command(diff_cli)
|
|
128
|
+
|
|
129
|
+
if __name__ == "__main__":
|
|
130
|
+
cli()
|
gac/config.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Configuration loading for gac.
|
|
2
|
+
|
|
3
|
+
Handles environment variable and .env file precedence for application settings.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from dotenv import load_dotenv
|
|
10
|
+
|
|
11
|
+
from gac.constants import EnvDefaults, Logging
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def load_config() -> dict[str, str | int | float | bool]:
|
|
15
|
+
"""Load configuration from $HOME/.gac.env, then ./.env, then environment variables."""
|
|
16
|
+
user_config = Path.home() / ".gac.env"
|
|
17
|
+
if user_config.exists():
|
|
18
|
+
load_dotenv(user_config)
|
|
19
|
+
project_env = Path(".env")
|
|
20
|
+
if project_env.exists():
|
|
21
|
+
load_dotenv(project_env, override=True)
|
|
22
|
+
|
|
23
|
+
config = {
|
|
24
|
+
"model": os.getenv("GAC_MODEL"),
|
|
25
|
+
"temperature": float(os.getenv("GAC_TEMPERATURE", EnvDefaults.TEMPERATURE)),
|
|
26
|
+
"max_output_tokens": int(os.getenv("GAC_MAX_OUTPUT_TOKENS", EnvDefaults.MAX_OUTPUT_TOKENS)),
|
|
27
|
+
"max_retries": int(os.getenv("GAC_RETRIES", EnvDefaults.MAX_RETRIES)),
|
|
28
|
+
"log_level": os.getenv("GAC_LOG_LEVEL", Logging.DEFAULT_LEVEL),
|
|
29
|
+
"warning_limit_tokens": int(os.getenv("GAC_WARNING_LIMIT_TOKENS", EnvDefaults.WARNING_LIMIT_TOKENS)),
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
return config
|
gac/config_cli.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""CLI for managing gac configuration in $HOME/.gac.env."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import click
|
|
7
|
+
from dotenv import load_dotenv, set_key
|
|
8
|
+
|
|
9
|
+
GAC_ENV_PATH = Path.home() / ".gac.env"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@click.group()
|
|
13
|
+
def config():
|
|
14
|
+
"""Manage gac configuration."""
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@config.command()
|
|
19
|
+
def show() -> None:
|
|
20
|
+
"""Show all current config values."""
|
|
21
|
+
if not GAC_ENV_PATH.exists():
|
|
22
|
+
click.echo("No $HOME/.gac.env found.")
|
|
23
|
+
return
|
|
24
|
+
load_dotenv(GAC_ENV_PATH, override=True)
|
|
25
|
+
with open(GAC_ENV_PATH) as f:
|
|
26
|
+
for line in f:
|
|
27
|
+
click.echo(line.rstrip())
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@config.command()
|
|
31
|
+
@click.argument("key")
|
|
32
|
+
@click.argument("value")
|
|
33
|
+
def set(key: str, value: str) -> None:
|
|
34
|
+
"""Set a config KEY to VALUE in $HOME/.gac.env."""
|
|
35
|
+
GAC_ENV_PATH.touch(exist_ok=True)
|
|
36
|
+
set_key(str(GAC_ENV_PATH), key, value)
|
|
37
|
+
click.echo(f"Set {key} in $HOME/.gac.env")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@config.command()
|
|
41
|
+
@click.argument("key")
|
|
42
|
+
def get(key: str) -> None:
|
|
43
|
+
"""Get a config value by KEY."""
|
|
44
|
+
load_dotenv(GAC_ENV_PATH, override=True)
|
|
45
|
+
value = os.getenv(key)
|
|
46
|
+
if value is None:
|
|
47
|
+
click.echo(f"{key} not set.")
|
|
48
|
+
else:
|
|
49
|
+
click.echo(value)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@config.command()
|
|
53
|
+
@click.argument("key")
|
|
54
|
+
def unset(key: str) -> None:
|
|
55
|
+
"""Remove a config KEY from $HOME/.gac.env."""
|
|
56
|
+
if not GAC_ENV_PATH.exists():
|
|
57
|
+
click.echo("No $HOME/.gac.env found.")
|
|
58
|
+
return
|
|
59
|
+
lines = GAC_ENV_PATH.read_text().splitlines()
|
|
60
|
+
new_lines = [line for line in lines if not line.strip().startswith(f"{key}=")]
|
|
61
|
+
GAC_ENV_PATH.write_text("\n".join(new_lines) + "\n")
|
|
62
|
+
click.echo(f"Unset {key} in $HOME/.gac.env")
|
gac/constants.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""Constants for the Git Auto Commit (gac) project."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from re import Pattern
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class FileStatus(Enum):
|
|
9
|
+
"""File status for Git operations."""
|
|
10
|
+
|
|
11
|
+
MODIFIED = "M"
|
|
12
|
+
ADDED = "A"
|
|
13
|
+
DELETED = "D"
|
|
14
|
+
RENAMED = "R"
|
|
15
|
+
COPIED = "C"
|
|
16
|
+
UNTRACKED = "?"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class EnvDefaults:
|
|
20
|
+
"""Default values for environment variables."""
|
|
21
|
+
|
|
22
|
+
MAX_RETRIES: int = 3
|
|
23
|
+
TEMPERATURE: float = 1
|
|
24
|
+
MAX_OUTPUT_TOKENS: int = 512
|
|
25
|
+
WARNING_LIMIT_TOKENS: int = 16384
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Logging:
|
|
29
|
+
"""Logging configuration constants."""
|
|
30
|
+
|
|
31
|
+
DEFAULT_LEVEL: str = "WARNING"
|
|
32
|
+
LEVELS: list[str] = ["DEBUG", "INFO", "WARNING", "ERROR"]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class Utility:
|
|
36
|
+
"""General utility constants."""
|
|
37
|
+
|
|
38
|
+
DEFAULT_ENCODING: str = "cl100k_base" # llm encoding
|
|
39
|
+
DEFAULT_DIFF_TOKEN_LIMIT: int = 15000 # Maximum tokens for diff processing
|
|
40
|
+
MAX_WORKERS: int = os.cpu_count() or 4 # Maximum number of parallel workers
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class FilePatterns:
|
|
44
|
+
"""Patterns for identifying special file types."""
|
|
45
|
+
|
|
46
|
+
# Regex patterns to detect binary file changes in git diffs (e.g., images or other non-text files)
|
|
47
|
+
BINARY: list[Pattern[str]] = [
|
|
48
|
+
r"Binary files .* differ",
|
|
49
|
+
r"GIT binary patch",
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
# Regex patterns to detect minified files in git diffs (e.g., JavaScript or CSS files)
|
|
53
|
+
MINIFIED_EXTENSIONS: list[str] = [
|
|
54
|
+
".min.js",
|
|
55
|
+
".min.css",
|
|
56
|
+
".bundle.js",
|
|
57
|
+
".bundle.css",
|
|
58
|
+
".compressed.js",
|
|
59
|
+
".compressed.css",
|
|
60
|
+
".opt.js",
|
|
61
|
+
".opt.css",
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
# Regex patterns to detect build directories in git diffs (e.g., dist, build, vendor, etc.)
|
|
65
|
+
BUILD_DIRECTORIES: list[str] = [
|
|
66
|
+
"/dist/",
|
|
67
|
+
"/build/",
|
|
68
|
+
"/vendor/",
|
|
69
|
+
"/node_modules/",
|
|
70
|
+
"/assets/vendor/",
|
|
71
|
+
"/public/build/",
|
|
72
|
+
"/static/dist/",
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class FileTypeImportance:
|
|
77
|
+
"""Importance multipliers for different file types."""
|
|
78
|
+
|
|
79
|
+
EXTENSIONS: dict[str, float] = {
|
|
80
|
+
# Programming languages
|
|
81
|
+
".py": 5.0, # Python
|
|
82
|
+
".js": 4.5, # JavaScript
|
|
83
|
+
".ts": 4.5, # TypeScript
|
|
84
|
+
".jsx": 4.8, # React JS
|
|
85
|
+
".tsx": 4.8, # React TS
|
|
86
|
+
".go": 4.5, # Go
|
|
87
|
+
".rs": 4.5, # Rust
|
|
88
|
+
".java": 4.2, # Java
|
|
89
|
+
".c": 4.2, # C
|
|
90
|
+
".h": 4.2, # C/C++ header
|
|
91
|
+
".cpp": 4.2, # C++
|
|
92
|
+
".rb": 4.2, # Ruby
|
|
93
|
+
".php": 4.0, # PHP
|
|
94
|
+
".scala": 4.0, # Scala
|
|
95
|
+
".swift": 4.0, # Swift
|
|
96
|
+
".kt": 4.0, # Kotlin
|
|
97
|
+
# Configuration
|
|
98
|
+
".json": 3.5, # JSON config
|
|
99
|
+
".yaml": 3.8, # YAML config
|
|
100
|
+
".yml": 3.8, # YAML config
|
|
101
|
+
".toml": 3.8, # TOML config
|
|
102
|
+
".ini": 3.5, # INI config
|
|
103
|
+
".env": 3.5, # Environment variables
|
|
104
|
+
# Documentation
|
|
105
|
+
".md": 4.0, # Markdown
|
|
106
|
+
".rst": 3.8, # reStructuredText
|
|
107
|
+
# Web
|
|
108
|
+
".html": 3.5, # HTML
|
|
109
|
+
".css": 3.5, # CSS
|
|
110
|
+
".scss": 3.5, # SCSS
|
|
111
|
+
".svg": 2.5, # SVG graphics
|
|
112
|
+
# Build & CI
|
|
113
|
+
"Dockerfile": 4.0, # Docker
|
|
114
|
+
".github/workflows": 4.0, # GitHub Actions
|
|
115
|
+
"CMakeLists.txt": 3.8, # CMake
|
|
116
|
+
"Makefile": 3.8, # Make
|
|
117
|
+
"package.json": 4.2, # NPM package
|
|
118
|
+
"pyproject.toml": 4.2, # Python project
|
|
119
|
+
"requirements.txt": 4.0, # Python requirements
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class CodePatternImportance:
|
|
124
|
+
"""Importance multipliers for different code patterns."""
|
|
125
|
+
|
|
126
|
+
# Regex patterns to detect code structure changes in git diffs (e.g., class, function, import)
|
|
127
|
+
# Note: The patterns are prefixed with "+" to match only added and modified lines
|
|
128
|
+
PATTERNS: dict[Pattern[str], float] = {
|
|
129
|
+
# Structure changes
|
|
130
|
+
r"\+\s*(class|interface|enum)\s+\w+": 1.8, # Class/interface/enum definitions
|
|
131
|
+
r"\+\s*(def|function|func)\s+\w+\s*\(": 1.5, # Function definitions
|
|
132
|
+
r"\+\s*(import|from .* import)": 1.3, # Imports
|
|
133
|
+
r"\+\s*(public|private|protected)\s+\w+": 1.2, # Access modifiers
|
|
134
|
+
# Configuration changes
|
|
135
|
+
r"\+\s*\"(dependencies|devDependencies)\"": 1.4, # Package dependencies
|
|
136
|
+
r"\+\s*version[\"\s:=]+[0-9.]+": 1.3, # Version changes
|
|
137
|
+
# Logic changes
|
|
138
|
+
r"\+\s*(if|else|elif|switch|case|for|while)[\s(]": 1.2, # Control structures
|
|
139
|
+
r"\+\s*(try|catch|except|finally)[\s:]": 1.2, # Exception handling
|
|
140
|
+
r"\+\s*return\s+": 1.1, # Return statements
|
|
141
|
+
r"\+\s*await\s+": 1.1, # Async/await
|
|
142
|
+
# Comments & docs
|
|
143
|
+
r"\+\s*(//|#|/\*|\*\*)\s*TODO": 1.2, # TODOs
|
|
144
|
+
r"\+\s*(//|#|/\*|\*\*)\s*FIX": 1.3, # FIXes
|
|
145
|
+
r"\+\s*(\"\"\"|\'\'\')": 1.1, # Docstrings
|
|
146
|
+
# Test code
|
|
147
|
+
r"\+\s*(test|describe|it|should)\s*\(": 1.1, # Test definitions
|
|
148
|
+
r"\+\s*(assert|expect)": 1.0, # Assertions
|
|
149
|
+
}
|