gac 1.2.6__tar.gz → 1.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gac might be problematic. Click here for more details.

Files changed (30) hide show
  1. {gac-1.2.6 → gac-1.3.1}/.gitignore +1 -0
  2. {gac-1.2.6 → gac-1.3.1}/PKG-INFO +26 -1
  3. {gac-1.2.6 → gac-1.3.1}/README.md +25 -0
  4. {gac-1.2.6 → gac-1.3.1}/src/gac/__version__.py +1 -1
  5. {gac-1.2.6 → gac-1.3.1}/src/gac/ai_utils.py +1 -1
  6. {gac-1.2.6 → gac-1.3.1}/src/gac/cli.py +4 -0
  7. {gac-1.2.6 → gac-1.3.1}/src/gac/config.py +2 -0
  8. {gac-1.2.6 → gac-1.3.1}/src/gac/constants.py +2 -0
  9. {gac-1.2.6 → gac-1.3.1}/src/gac/errors.py +9 -0
  10. {gac-1.2.6 → gac-1.3.1}/src/gac/init_cli.py +10 -0
  11. {gac-1.2.6 → gac-1.3.1}/src/gac/main.py +69 -7
  12. {gac-1.2.6 → gac-1.3.1}/src/gac/providers/openrouter.py +15 -1
  13. {gac-1.2.6 → gac-1.3.1}/src/gac/providers/zai.py +7 -1
  14. gac-1.3.1/src/gac/security.py +293 -0
  15. {gac-1.2.6 → gac-1.3.1}/LICENSE +0 -0
  16. {gac-1.2.6 → gac-1.3.1}/pyproject.toml +0 -0
  17. {gac-1.2.6 → gac-1.3.1}/src/gac/__init__.py +0 -0
  18. {gac-1.2.6 → gac-1.3.1}/src/gac/ai.py +0 -0
  19. {gac-1.2.6 → gac-1.3.1}/src/gac/config_cli.py +0 -0
  20. {gac-1.2.6 → gac-1.3.1}/src/gac/diff_cli.py +0 -0
  21. {gac-1.2.6 → gac-1.3.1}/src/gac/git.py +0 -0
  22. {gac-1.2.6 → gac-1.3.1}/src/gac/preprocess.py +0 -0
  23. {gac-1.2.6 → gac-1.3.1}/src/gac/prompt.py +0 -0
  24. {gac-1.2.6 → gac-1.3.1}/src/gac/providers/__init__.py +0 -0
  25. {gac-1.2.6 → gac-1.3.1}/src/gac/providers/anthropic.py +0 -0
  26. {gac-1.2.6 → gac-1.3.1}/src/gac/providers/cerebras.py +0 -0
  27. {gac-1.2.6 → gac-1.3.1}/src/gac/providers/groq.py +0 -0
  28. {gac-1.2.6 → gac-1.3.1}/src/gac/providers/ollama.py +0 -0
  29. {gac-1.2.6 → gac-1.3.1}/src/gac/providers/openai.py +0 -0
  30. {gac-1.2.6 → gac-1.3.1}/src/gac/utils.py +0 -0
@@ -210,3 +210,4 @@ scripts/changelog_prompt.md
210
210
  .plandex-v2/
211
211
  .vscode/
212
212
  .serena/
213
+ .crush.json
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gac
3
- Version: 1.2.6
3
+ Version: 1.3.1
4
4
  Summary: AI-powered Git commit message generator with multi-provider support
5
5
  Project-URL: Homepage, https://github.com/cellwebb/gac
6
6
  Project-URL: Documentation, https://github.com/cellwebb/gac#readme
@@ -62,6 +62,7 @@ Description-Content-Type: text/markdown
62
62
  - **Streamlined Workflow Commands:** Boost your productivity with convenient options to stage all changes (`-a`), auto-confirm commits (`-y`), and push to your remote repository (`-p`) in a single step.
63
63
  - **Interactive Reroll with Feedback:** Not satisfied with the generated commit message? Use `r` for a simple regeneration, or `r <feedback>` to provide specific improvement suggestions (e.g., `r make it shorter`, `r focus on the bug fix`).
64
64
  - **Token Usage Tracking:** Display token consumption statistics (prompt, completion, and total tokens).
65
+ - **Security Scanner:** Built-in detection of secrets and API keys in staged changes to prevent accidental commits of sensitive information.
65
66
 
66
67
  ## How It Works
67
68
 
@@ -200,6 +201,30 @@ Once installed and configured, using `gac` is straightforward:
200
201
  - Add a hint for the AI: `gac -h "Fixed the authentication bug"`
201
202
  - Push the commit (requires accepting the commit message): `gac -p`
202
203
  - Advanced usage: Add all, auto-confirm, push a one-liner with a hint: `gac -aypo -h "update for release"`
204
+ - Skip security scan: `gac --skip-secret-scan` (use with caution)
205
+
206
+ ### Security Features
207
+
208
+ GAC includes a built-in security scanner to prevent accidental commits of sensitive information:
209
+
210
+ - **Automatic Scanning**: By default, scans all staged changes for potential secrets and API keys
211
+ - **Interactive Protection**: When secrets are detected, you can:
212
+ - Abort the commit (recommended)
213
+ - Continue anyway (not recommended)
214
+ - Remove affected files and continue
215
+ - **Wide Coverage**: Detects AWS keys, GitHub tokens, OpenAI API keys, database URLs, private keys, and more
216
+ - **Smart Filtering**: Ignores example keys, placeholders, and test values to reduce false positives
217
+
218
+ To disable the security scan (not recommended unless you know what you're doing):
219
+
220
+ ```sh
221
+ # Skip scan for one command
222
+ gac --skip-secret-scan
223
+
224
+ # Or disable via environment variable
225
+ export GAC_SKIP_SECRET_SCAN=true
226
+ gac
227
+ ```
203
228
 
204
229
  For a full list of CLI flags, advanced options, and example workflows, see [USAGE.md](USAGE.md).
205
230
 
@@ -20,6 +20,7 @@
20
20
  - **Streamlined Workflow Commands:** Boost your productivity with convenient options to stage all changes (`-a`), auto-confirm commits (`-y`), and push to your remote repository (`-p`) in a single step.
21
21
  - **Interactive Reroll with Feedback:** Not satisfied with the generated commit message? Use `r` for a simple regeneration, or `r <feedback>` to provide specific improvement suggestions (e.g., `r make it shorter`, `r focus on the bug fix`).
22
22
  - **Token Usage Tracking:** Display token consumption statistics (prompt, completion, and total tokens).
23
+ - **Security Scanner:** Built-in detection of secrets and API keys in staged changes to prevent accidental commits of sensitive information.
23
24
 
24
25
  ## How It Works
25
26
 
@@ -158,6 +159,30 @@ Once installed and configured, using `gac` is straightforward:
158
159
  - Add a hint for the AI: `gac -h "Fixed the authentication bug"`
159
160
  - Push the commit (requires accepting the commit message): `gac -p`
160
161
  - Advanced usage: Add all, auto-confirm, push a one-liner with a hint: `gac -aypo -h "update for release"`
162
+ - Skip security scan: `gac --skip-secret-scan` (use with caution)
163
+
164
+ ### Security Features
165
+
166
+ GAC includes a built-in security scanner to prevent accidental commits of sensitive information:
167
+
168
+ - **Automatic Scanning**: By default, scans all staged changes for potential secrets and API keys
169
+ - **Interactive Protection**: When secrets are detected, you can:
170
+ - Abort the commit (recommended)
171
+ - Continue anyway (not recommended)
172
+ - Remove affected files and continue
173
+ - **Wide Coverage**: Detects AWS keys, GitHub tokens, OpenAI API keys, database URLs, private keys, and more
174
+ - **Smart Filtering**: Ignores example keys, placeholders, and test values to reduce false positives
175
+
176
+ To disable the security scan (not recommended unless you know what you're doing):
177
+
178
+ ```sh
179
+ # Skip scan for one command
180
+ gac --skip-secret-scan
181
+
182
+ # Or disable via environment variable
183
+ export GAC_SKIP_SECRET_SCAN=true
184
+ gac
185
+ ```
161
186
 
162
187
  For a full list of CLI flags, advanced options, and example workflows, see [USAGE.md](USAGE.md).
163
188
 
@@ -1,3 +1,3 @@
1
1
  """Version information for gac package."""
2
2
 
3
- __version__ = "1.2.6"
3
+ __version__ = "1.3.1"
@@ -93,7 +93,7 @@ def generate_with_retries(
93
93
  provider, model_name = model.split(":", 1)
94
94
 
95
95
  # Validate provider
96
- supported_providers = ["anthropic", "openai", "groq", "cerebras", "ollama", "openrouter"]
96
+ supported_providers = ["anthropic", "openai", "groq", "cerebras", "ollama", "openrouter", "zai"]
97
97
  if provider not in supported_providers:
98
98
  raise AIError.model_error(f"Unsupported provider: {provider}. Supported providers: {supported_providers}")
99
99
 
@@ -54,6 +54,7 @@ logger = logging.getLogger(__name__)
54
54
  )
55
55
  # Advanced options
56
56
  @click.option("--no-verify", is_flag=True, help="Skip pre-commit hooks when committing")
57
+ @click.option("--skip-secret-scan", is_flag=True, help="Skip security scan for secrets in staged changes")
57
58
  # Other options
58
59
  @click.option("--version", is_flag=True, help="Show the version of the Git Auto Commit (gac) tool")
59
60
  @click.pass_context
@@ -73,6 +74,7 @@ def cli(
73
74
  dry_run: bool = False,
74
75
  verbose: bool = False,
75
76
  no_verify: bool = False,
77
+ skip_secret_scan: bool = False,
76
78
  ) -> None:
77
79
  """Git Auto Commit - Generate commit messages with AI."""
78
80
  if ctx.invoked_subcommand is None:
@@ -103,6 +105,7 @@ def cli(
103
105
  quiet=quiet,
104
106
  dry_run=dry_run,
105
107
  no_verify=no_verify,
108
+ skip_secret_scan=skip_secret_scan or bool(config.get("skip_secret_scan", False)),
106
109
  )
107
110
  except Exception as e:
108
111
  handle_error(e, exit_program=True)
@@ -125,6 +128,7 @@ def cli(
125
128
  "dry_run": dry_run,
126
129
  "verbose": verbose,
127
130
  "no_verify": no_verify,
131
+ "skip_secret_scan": skip_secret_scan,
128
132
  }
129
133
 
130
134
 
@@ -35,6 +35,8 @@ def load_config() -> dict[str, str | int | float | bool]:
35
35
  "warning_limit_tokens": int(os.getenv("GAC_WARNING_LIMIT_TOKENS", EnvDefaults.WARNING_LIMIT_TOKENS)),
36
36
  "always_include_scope": os.getenv("GAC_ALWAYS_INCLUDE_SCOPE", str(EnvDefaults.ALWAYS_INCLUDE_SCOPE)).lower()
37
37
  in ("true", "1", "yes", "on"),
38
+ "skip_secret_scan": os.getenv("GAC_SKIP_SECRET_SCAN", str(EnvDefaults.SKIP_SECRET_SCAN)).lower()
39
+ in ("true", "1", "yes", "on"),
38
40
  }
39
41
 
40
42
  return config
@@ -24,6 +24,7 @@ class EnvDefaults:
24
24
  MAX_OUTPUT_TOKENS: int = 512
25
25
  WARNING_LIMIT_TOKENS: int = 16384
26
26
  ALWAYS_INCLUDE_SCOPE: bool = False
27
+ SKIP_SECRET_SCAN: bool = False
27
28
 
28
29
 
29
30
  class Logging:
@@ -39,6 +40,7 @@ class Utility:
39
40
  DEFAULT_ENCODING: str = "cl100k_base" # llm encoding
40
41
  DEFAULT_DIFF_TOKEN_LIMIT: int = 15000 # Maximum tokens for diff processing
41
42
  MAX_WORKERS: int = os.cpu_count() or 4 # Maximum number of parallel workers
43
+ MAX_DISPLAYED_SECRET_LENGTH: int = 50 # Maximum length for displaying secrets
42
44
 
43
45
 
44
46
  class FilePatterns:
@@ -107,6 +107,12 @@ class FormattingError(GacError):
107
107
  exit_code = 5
108
108
 
109
109
 
110
+ class SecurityError(GacError):
111
+ """Error related to security issues (e.g., detected secrets)."""
112
+
113
+ exit_code = 6
114
+
115
+
110
116
  # Simplified error hierarchy - we use a single AIError class with error codes
111
117
  # instead of multiple subclasses for better maintainability
112
118
 
@@ -135,6 +141,8 @@ def handle_error(error: Exception, exit_program: bool = False, quiet: bool = Fal
135
141
  logger.error("Git operation failed. Please check your repository status.")
136
142
  elif isinstance(error, AIError):
137
143
  logger.error("AI operation failed. Please check your configuration and API keys.")
144
+ elif isinstance(error, SecurityError):
145
+ logger.error("Security scan detected potential secrets in staged changes.")
138
146
  else:
139
147
  logger.error("An unexpected error occurred.")
140
148
 
@@ -175,6 +183,7 @@ def format_error_for_user(error: Exception) -> str:
175
183
  ConfigError: "Please check your configuration settings.",
176
184
  GitError: "Please ensure Git is installed and you're in a valid Git repository.",
177
185
  FormattingError: "Please check that required formatters are installed.",
186
+ SecurityError: "Please remove or secure any detected secrets before committing.",
178
187
  }
179
188
 
180
189
  # Generic remediation for unexpected errors
@@ -45,4 +45,14 @@ def init() -> None:
45
45
  set_key(str(GAC_ENV_PATH), f"{provider_key.upper()}_API_KEY", api_key)
46
46
  click.echo(f"Set {provider_key.upper()}_API_KEY (hidden)")
47
47
 
48
+ # Ask about ZAI coding plan if Z.AI provider was selected
49
+ if provider_key == "zai":
50
+ use_coding_api = questionary.confirm(
51
+ "Are you using a Z.AI coding plan? (uses different API endpoint)",
52
+ default=False,
53
+ ).ask()
54
+ if use_coding_api:
55
+ set_key(str(GAC_ENV_PATH), "GAC_ZAI_USE_CODING_PLAN", "true")
56
+ click.echo("Set GAC_ZAI_USE_CODING_PLAN=true")
57
+
48
58
  click.echo(f"\ngac environment setup complete. You can edit {GAC_ENV_PATH} to update values later.")
@@ -23,10 +23,12 @@ from gac.git import (
23
23
  )
24
24
  from gac.preprocess import preprocess_diff
25
25
  from gac.prompt import build_prompt, clean_commit_message
26
+ from gac.security import get_affected_files, scan_staged_diff
26
27
 
27
28
  logger = logging.getLogger(__name__)
28
29
 
29
30
  config = load_config()
31
+ console = Console() # Initialize console globally to prevent undefined access
30
32
 
31
33
 
32
34
  def main(
@@ -41,6 +43,7 @@ def main(
41
43
  quiet: bool = False,
42
44
  dry_run: bool = False,
43
45
  no_verify: bool = False,
46
+ skip_secret_scan: bool = False,
44
47
  ) -> None:
45
48
  """Main application logic for gac."""
46
49
  try:
@@ -72,7 +75,6 @@ def main(
72
75
  # Check for staged files
73
76
  staged_files = get_staged_files(existing_only=False)
74
77
  if not staged_files:
75
- console = Console()
76
78
  console.print(
77
79
  "[yellow]No staged changes found. Stage your changes with git add first or use --add-all.[/yellow]"
78
80
  )
@@ -81,7 +83,6 @@ def main(
81
83
  # Run pre-commit hooks before doing expensive operations
82
84
  if not no_verify and not dry_run:
83
85
  if not run_pre_commit_hooks():
84
- console = Console()
85
86
  console.print("[red]Pre-commit hooks failed. Please fix the issues and try again.[/red]")
86
87
  console.print("[yellow]You can use --no-verify to skip pre-commit hooks.[/yellow]")
87
88
  sys.exit(1)
@@ -90,6 +91,72 @@ def main(
90
91
  diff = run_git_command(["diff", "--staged"])
91
92
  diff_stat = " " + run_git_command(["diff", "--stat", "--cached"])
92
93
 
94
+ # Security scan for secrets
95
+ if not skip_secret_scan:
96
+ logger.info("Scanning staged changes for potential secrets...")
97
+ secrets = scan_staged_diff(diff)
98
+ if secrets:
99
+ if not quiet:
100
+ console.print("\n[bold red]⚠️ SECURITY WARNING: Potential secrets detected![/bold red]")
101
+ console.print("[red]The following sensitive information was found in your staged changes:[/red]\n")
102
+
103
+ for secret in secrets:
104
+ location = f"{secret.file_path}:{secret.line_number}" if secret.line_number else secret.file_path
105
+ if not quiet:
106
+ console.print(f" • [yellow]{secret.secret_type}[/yellow] in [cyan]{location}[/cyan]")
107
+ console.print(f" Match: [dim]{secret.matched_text}[/dim]\n")
108
+
109
+ if not quiet:
110
+ console.print("\n[bold]Options:[/bold]")
111
+ console.print(" \\[a] Abort commit (recommended)")
112
+ console.print(" \\[c] [yellow]Continue anyway[/yellow] (not recommended)")
113
+ console.print(" \\[r] Remove affected file(s) and continue")
114
+
115
+ try:
116
+ choice = (
117
+ click.prompt(
118
+ "\nChoose an option",
119
+ type=click.Choice(["a", "c", "r"], case_sensitive=False),
120
+ default="a",
121
+ show_choices=True,
122
+ show_default=True,
123
+ )
124
+ .strip()
125
+ .lower()
126
+ )
127
+ except (EOFError, KeyboardInterrupt):
128
+ console.print("\n[red]Aborted by user.[/red]")
129
+ sys.exit(0)
130
+
131
+ if choice == "a":
132
+ console.print("[yellow]Commit aborted.[/yellow]")
133
+ sys.exit(0)
134
+ elif choice == "c":
135
+ console.print("[bold yellow]⚠️ Continuing with potential secrets in commit...[/bold yellow]")
136
+ logger.warning("User chose to continue despite detected secrets")
137
+ elif choice == "r":
138
+ affected_files = get_affected_files(secrets)
139
+ for file_path in affected_files:
140
+ try:
141
+ run_git_command(["reset", "HEAD", file_path])
142
+ console.print(f"[green]Unstaged: {file_path}[/green]")
143
+ except GitError as e:
144
+ console.print(f"[red]Failed to unstage {file_path}: {e}[/red]")
145
+
146
+ # Check if there are still staged files
147
+ remaining_staged = get_staged_files(existing_only=False)
148
+ if not remaining_staged:
149
+ console.print("[yellow]No files remain staged. Commit aborted.[/yellow]")
150
+ sys.exit(0)
151
+
152
+ console.print(f"[green]Continuing with {len(remaining_staged)} staged file(s)...[/green]")
153
+ # Refresh all git state variables after removing files
154
+ status = run_git_command(["status"])
155
+ diff = run_git_command(["diff", "--staged"])
156
+ diff_stat = " " + run_git_command(["diff", "--stat", "--cached"])
157
+ else:
158
+ logger.info("No secrets detected in staged changes")
159
+
93
160
  # Preprocess the diff before passing to build_prompt
94
161
  logger.debug(f"Preprocessing diff ({len(diff)} characters)")
95
162
  model_id = model or config["model"]
@@ -106,7 +173,6 @@ def main(
106
173
  )
107
174
 
108
175
  if show_prompt:
109
- console = Console()
110
176
  # Show both system and user prompts
111
177
  full_prompt = f"SYSTEM PROMPT:\n{system_prompt}\n\nUSER PROMPT:\n{user_prompt}"
112
178
  console.print(
@@ -123,7 +189,6 @@ def main(
123
189
 
124
190
  warning_limit = config.get("warning_limit_tokens", EnvDefaults.WARNING_LIMIT_TOKENS)
125
191
  if warning_limit and prompt_tokens > warning_limit:
126
- console = Console()
127
192
  console.print(
128
193
  f"[yellow]⚠️ WARNING: Prompt contains {prompt_tokens} tokens, which exceeds the warning limit of "
129
194
  f"{warning_limit} tokens.[/yellow]"
@@ -147,8 +212,6 @@ def main(
147
212
  logger.info("Generated commit message:")
148
213
  logger.info(commit_message)
149
214
 
150
- console = Console()
151
-
152
215
  # Reroll loop
153
216
  while True:
154
217
  console.print("[bold green]Generated commit message:[/bold green]")
@@ -250,7 +313,6 @@ def main(
250
313
  console.print("[green]Commit created successfully[/green]")
251
314
  except AIError as e:
252
315
  logger.error(str(e))
253
- console = Console()
254
316
  console.print(f"[red]Failed to generate commit message: {str(e)}[/red]")
255
317
  sys.exit(1)
256
318
 
@@ -32,6 +32,20 @@ def call_openrouter_api(model: str, messages: list[dict], temperature: float, ma
32
32
  response_data = response.json()
33
33
  return response_data["choices"][0]["message"]["content"]
34
34
  except httpx.HTTPStatusError as e:
35
- raise AIError.model_error(f"OpenRouter API error: {e.response.status_code} - {e.response.text}") from e
35
+ # Handle specific HTTP status codes
36
+ status_code = e.response.status_code
37
+ error_text = e.response.text
38
+
39
+ # Rate limiting
40
+ if status_code == 429:
41
+ raise AIError.rate_limit_error(f"OpenRouter API rate limit exceeded: {error_text}") from e
42
+ # Service unavailable
43
+ elif status_code in (502, 503):
44
+ raise AIError.connection_error(f"OpenRouter API service unavailable: {status_code} - {error_text}") from e
45
+ # Other HTTP errors
46
+ else:
47
+ raise AIError.model_error(f"OpenRouter API error: {status_code} - {error_text}") from e
48
+ except httpx.ConnectError as e:
49
+ raise AIError.connection_error(f"OpenRouter API connection error: {str(e)}") from e
36
50
  except Exception as e:
37
51
  raise AIError.model_error(f"Error calling OpenRouter API: {str(e)}") from e
@@ -13,7 +13,13 @@ def call_zai_api(model: str, messages: list[dict], temperature: float, max_token
13
13
  if not api_key:
14
14
  raise AIError.model_error("ZAI_API_KEY not found in environment variables")
15
15
 
16
- url = "https://api.z.ai/api/paas/v4/chat/completions"
16
+ # Support both regular and coding API endpoints
17
+ use_coding_api = os.getenv("GAC_ZAI_USE_CODING_PLAN", "false").lower() in ("true", "1", "yes", "on")
18
+ if use_coding_api:
19
+ url = "https://api.z.ai/api/coding/paas/v4/chat/completions"
20
+ else:
21
+ url = "https://api.z.ai/api/paas/v4/chat/completions"
22
+
17
23
  headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
18
24
 
19
25
  data = {"model": model, "messages": messages, "temperature": temperature, "max_tokens": max_tokens}
@@ -0,0 +1,293 @@
1
+ #!/usr/bin/env python3
2
+ """Security utilities for detecting secrets and API keys in git diffs.
3
+
4
+ This module provides functions to scan staged changes for potential secrets,
5
+ API keys, and other sensitive information that should not be committed.
6
+ """
7
+
8
+ import logging
9
+ import re
10
+ from dataclasses import dataclass
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ @dataclass
16
+ class DetectedSecret:
17
+ """Represents a detected secret in a file."""
18
+
19
+ file_path: str
20
+ line_number: int | None
21
+ secret_type: str
22
+ matched_text: str
23
+ context: str | None = None
24
+
25
+
26
+ class SecretPatterns:
27
+ """Regex patterns for detecting various types of secrets and API keys."""
28
+
29
+ # AWS Access Keys
30
+ AWS_ACCESS_KEY_ID = re.compile(r"(?:AWS_ACCESS_KEY_ID|aws_access_key_id)[\s:=\"']+([A-Z0-9]{20})", re.IGNORECASE)
31
+ AWS_SECRET_ACCESS_KEY = re.compile(
32
+ r"(?:AWS_SECRET_ACCESS_KEY|aws_secret_access_key)[\s:=\"']+([A-Za-z0-9/+=]{40})", re.IGNORECASE
33
+ )
34
+ AWS_SESSION_TOKEN = re.compile(r"(?:AWS_SESSION_TOKEN|aws_session_token)[\s:=\"']+([A-Za-z0-9/+=]+)", re.IGNORECASE)
35
+
36
+ # Generic API Keys
37
+ GENERIC_API_KEY = re.compile(
38
+ r"(?:api[-_]?key|api[-_]?secret|access[-_]?key|secret[-_]?key)[\s:=\"']+([A-Za-z0-9_\-]{20,})", re.IGNORECASE
39
+ )
40
+
41
+ # GitHub Tokens
42
+ GITHUB_TOKEN = re.compile(r"(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9]{36,}")
43
+
44
+ # OpenAI API Keys
45
+ OPENAI_API_KEY = re.compile(r"sk-[A-Za-z0-9]{20,}")
46
+
47
+ # Anthropic API Keys
48
+ ANTHROPIC_API_KEY = re.compile(r"sk-ant-[A-Za-z0-9\-_]{95,}")
49
+
50
+ # Stripe Keys
51
+ STRIPE_KEY = re.compile(r"(?:sk|pk|rk)_(?:live|test)_[A-Za-z0-9]{24,}")
52
+
53
+ # Private Keys (PEM format)
54
+ PRIVATE_KEY = re.compile(r"-----BEGIN (?:RSA |EC |OPENSSH )?PRIVATE KEY-----")
55
+
56
+ # Bearer Tokens (require actual token with specific characteristics)
57
+ BEARER_TOKEN = re.compile(r"Bearer\s+[A-Za-z0-9]{20,}[/=]*\s", re.IGNORECASE)
58
+
59
+ # JWT Tokens
60
+ JWT_TOKEN = re.compile(r"eyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+")
61
+
62
+ # Database URLs with credentials
63
+ DATABASE_URL = re.compile(
64
+ r"(?:postgresql|mysql|mongodb|redis)://[A-Za-z0-9_-]+:[A-Za-z0-9_@!#$%^&*()+-=]+@[A-Za-z0-9.-]+",
65
+ re.IGNORECASE,
66
+ )
67
+
68
+ # SSH Private Keys
69
+ SSH_PRIVATE_KEY = re.compile(r"-----BEGIN (?:RSA|DSA|EC|OPENSSH) PRIVATE KEY-----")
70
+
71
+ # Slack Tokens
72
+ SLACK_TOKEN = re.compile(r"xox[baprs]-[A-Za-z0-9-]+")
73
+
74
+ # Google API Keys
75
+ GOOGLE_API_KEY = re.compile(r"AIza[0-9A-Za-z_-]{35}")
76
+
77
+ # Twilio API Keys
78
+ TWILIO_API_KEY = re.compile(r"SK[a-f0-9]{32}")
79
+
80
+ # Generic Password Fields
81
+ PASSWORD = re.compile(r"(?:password|passwd|pwd)[\s:=\"']+([^\s\"']{8,})", re.IGNORECASE)
82
+
83
+ # Excluded patterns (common false positives)
84
+ EXCLUDED_PATTERNS = [
85
+ re.compile(r"(?:example|sample|dummy|placeholder|your[-_]?api[-_]?key)", re.IGNORECASE),
86
+ re.compile(r"(?:xxx+|yyy+|zzz+)", re.IGNORECASE),
87
+ re.compile(r"\b(?:123456|password|changeme|secret|testpass|admin)\b", re.IGNORECASE), # Word boundaries
88
+ re.compile(r"ghp_[a-f0-9]{16}", re.IGNORECASE), # Short GitHub tokens (examples)
89
+ re.compile(r"sk-[a-f0-9]{16}", re.IGNORECASE), # Short OpenAI keys (examples)
90
+ re.compile(r"Bearer Token", re.IGNORECASE), # Documentation text
91
+ re.compile(r"Add Bearer Token", re.IGNORECASE), # Documentation text
92
+ re.compile(r"Test Bearer Token", re.IGNORECASE), # Documentation text
93
+ ]
94
+
95
+ @classmethod
96
+ def get_all_patterns(cls) -> dict[str, re.Pattern]:
97
+ """Get all secret detection patterns.
98
+
99
+ Returns:
100
+ Dictionary mapping pattern names to compiled regex patterns
101
+ """
102
+ patterns = {}
103
+ for name, value in vars(cls).items():
104
+ if isinstance(value, re.Pattern) and not name.startswith("EXCLUDED"):
105
+ # Convert pattern name to human-readable format
106
+ readable_name = name.replace("_", " ").title()
107
+ patterns[readable_name] = value
108
+ return patterns
109
+
110
+
111
+ def is_false_positive(matched_text: str, file_path: str = "") -> bool:
112
+ """Check if a matched secret is likely a false positive.
113
+
114
+ Args:
115
+ matched_text: The text that matched a secret pattern
116
+ file_path: The file path where the match was found (for context-based filtering)
117
+
118
+ Returns:
119
+ True if the match is likely a false positive
120
+ """
121
+ # Check against excluded patterns
122
+ for pattern in SecretPatterns.EXCLUDED_PATTERNS:
123
+ if pattern.search(matched_text):
124
+ return True
125
+
126
+ # Check for all-same characters (e.g., "xxxxxxxxxxxxxxxx")
127
+ if len(set(matched_text.lower())) <= 3 and len(matched_text) > 10:
128
+ return True
129
+
130
+ # Special handling for .env.example, .env.template, .env.sample files
131
+ if any(example_file in file_path for example_file in [".env.example", ".env.template", ".env.sample"]):
132
+ return True
133
+
134
+ return False
135
+
136
+
137
+ def extract_file_path_from_diff_section(section: str) -> str | None:
138
+ """Extract the file path from a git diff section.
139
+
140
+ Args:
141
+ section: A git diff section
142
+
143
+ Returns:
144
+ The file path or None if not found
145
+ """
146
+ match = re.search(r"diff --git a/(.*?) b/", section)
147
+ if match:
148
+ return match.group(1)
149
+ return None
150
+
151
+
152
+ def extract_line_number_from_hunk(line: str, hunk_header: str | None) -> int | None:
153
+ """Extract the line number from a diff hunk.
154
+
155
+ Args:
156
+ line: The diff line containing the secret
157
+ hunk_header: The most recent hunk header (e.g., "@@ -1,4 +1,5 @@")
158
+
159
+ Returns:
160
+ The line number or None if not determinable
161
+ """
162
+ if not hunk_header:
163
+ return None
164
+
165
+ # Parse hunk header to get starting line number: @@ -old_start,old_count +new_start,new_count @@
166
+ match = re.search(r"@@ -\d+(?:,\d+)? \+(\d+)", hunk_header)
167
+ if not match:
168
+ return None
169
+
170
+ return int(match.group(1))
171
+
172
+
173
+ def scan_diff_section(section: str) -> list[DetectedSecret]:
174
+ """Scan a single git diff section for secrets.
175
+
176
+ Args:
177
+ section: A git diff section to scan
178
+
179
+ Returns:
180
+ List of detected secrets
181
+ """
182
+ secrets = []
183
+ file_path = extract_file_path_from_diff_section(section)
184
+
185
+ if not file_path:
186
+ return secrets
187
+
188
+ patterns = SecretPatterns.get_all_patterns()
189
+ lines = section.split("\n")
190
+ line_counter = 0
191
+
192
+ for line in lines:
193
+ # Track hunk headers for line number extraction
194
+ if line.startswith("@@"):
195
+ # Reset line counter based on hunk header (this is the starting line number in the new file)
196
+ match = re.search(r"@@ -\d+(?:,\d+)? \+(\d+)", line)
197
+ if match:
198
+ line_counter = int(match.group(1)) - 1 # Start one line before, will increment correctly
199
+ continue
200
+
201
+ # Skip metadata lines
202
+ if line.startswith("+++") or line.startswith("---"):
203
+ continue
204
+
205
+ # Increment line counter for both added and context lines
206
+ if line.startswith("+") or line.startswith(" "):
207
+ line_counter += 1
208
+
209
+ # Only scan added lines (starting with '+')
210
+ if line.startswith("+") and not line.startswith("+++"):
211
+ # Check each pattern
212
+ content = line[1:] # Remove the '+' prefix for pattern matching
213
+ for pattern_name, pattern in patterns.items():
214
+ matches = pattern.finditer(content)
215
+ for match in matches:
216
+ matched_text = match.group(0)
217
+
218
+ # Skip false positives
219
+ if is_false_positive(matched_text, file_path):
220
+ logger.debug(f"Skipping false positive: {matched_text}")
221
+ continue
222
+
223
+ # Truncate matched text for display (avoid showing full secrets)
224
+ from gac.constants import Utility
225
+
226
+ display_text = (
227
+ matched_text[: Utility.MAX_DISPLAYED_SECRET_LENGTH] + "..."
228
+ if len(matched_text) > Utility.MAX_DISPLAYED_SECRET_LENGTH
229
+ else matched_text
230
+ )
231
+
232
+ secrets.append(
233
+ DetectedSecret(
234
+ file_path=file_path,
235
+ line_number=line_counter,
236
+ secret_type=pattern_name,
237
+ matched_text=display_text,
238
+ context=content.strip(),
239
+ )
240
+ )
241
+
242
+ return secrets
243
+
244
+
245
+ def scan_staged_diff(diff: str) -> list[DetectedSecret]:
246
+ """Scan staged git diff for secrets and API keys.
247
+
248
+ Args:
249
+ diff: The staged git diff to scan
250
+
251
+ Returns:
252
+ List of detected secrets
253
+ """
254
+ if not diff:
255
+ return []
256
+
257
+ # Split diff into sections (one per file)
258
+ sections = re.split(r"(?=^diff --git )", diff, flags=re.MULTILINE)
259
+ all_secrets = []
260
+
261
+ for section in sections:
262
+ if not section.strip():
263
+ continue
264
+
265
+ # Validate that this is a real git diff section
266
+ # Real diff sections must have diff --git header followed by --- and +++ lines
267
+ if not re.search(r"^diff --git ", section, flags=re.MULTILINE):
268
+ continue
269
+
270
+ if not re.search(r"^--- ", section, flags=re.MULTILINE):
271
+ continue
272
+
273
+ if not re.search(r"^\+\+\+ ", section, flags=re.MULTILINE):
274
+ continue
275
+
276
+ secrets = scan_diff_section(section)
277
+ all_secrets.extend(secrets)
278
+
279
+ logger.info(f"Secret scan complete: found {len(all_secrets)} potential secrets")
280
+ return all_secrets
281
+
282
+
283
+ def get_affected_files(secrets: list[DetectedSecret]) -> list[str]:
284
+ """Get unique list of files containing detected secrets.
285
+
286
+ Args:
287
+ secrets: List of detected secrets
288
+
289
+ Returns:
290
+ Sorted list of unique file paths
291
+ """
292
+ files = {secret.file_path for secret in secrets}
293
+ return sorted(files)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes