gac 0.17.2__py3-none-any.whl → 3.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. gac/__version__.py +1 -1
  2. gac/ai.py +69 -123
  3. gac/ai_utils.py +227 -0
  4. gac/auth_cli.py +69 -0
  5. gac/cli.py +87 -19
  6. gac/config.py +13 -7
  7. gac/config_cli.py +26 -5
  8. gac/constants.py +176 -5
  9. gac/errors.py +14 -0
  10. gac/git.py +207 -11
  11. gac/init_cli.py +52 -29
  12. gac/language_cli.py +378 -0
  13. gac/main.py +922 -189
  14. gac/model_cli.py +374 -0
  15. gac/oauth/__init__.py +1 -0
  16. gac/oauth/claude_code.py +397 -0
  17. gac/preprocess.py +5 -5
  18. gac/prompt.py +656 -219
  19. gac/providers/__init__.py +88 -0
  20. gac/providers/anthropic.py +51 -0
  21. gac/providers/azure_openai.py +97 -0
  22. gac/providers/cerebras.py +38 -0
  23. gac/providers/chutes.py +71 -0
  24. gac/providers/claude_code.py +102 -0
  25. gac/providers/custom_anthropic.py +133 -0
  26. gac/providers/custom_openai.py +98 -0
  27. gac/providers/deepseek.py +38 -0
  28. gac/providers/fireworks.py +38 -0
  29. gac/providers/gemini.py +87 -0
  30. gac/providers/groq.py +63 -0
  31. gac/providers/kimi_coding.py +63 -0
  32. gac/providers/lmstudio.py +59 -0
  33. gac/providers/minimax.py +38 -0
  34. gac/providers/mistral.py +38 -0
  35. gac/providers/moonshot.py +38 -0
  36. gac/providers/ollama.py +50 -0
  37. gac/providers/openai.py +38 -0
  38. gac/providers/openrouter.py +58 -0
  39. gac/providers/replicate.py +98 -0
  40. gac/providers/streamlake.py +51 -0
  41. gac/providers/synthetic.py +42 -0
  42. gac/providers/together.py +38 -0
  43. gac/providers/zai.py +59 -0
  44. gac/security.py +293 -0
  45. gac/utils.py +243 -4
  46. gac/workflow_utils.py +222 -0
  47. gac-3.6.0.dist-info/METADATA +281 -0
  48. gac-3.6.0.dist-info/RECORD +53 -0
  49. {gac-0.17.2.dist-info → gac-3.6.0.dist-info}/WHEEL +1 -1
  50. {gac-0.17.2.dist-info → gac-3.6.0.dist-info}/licenses/LICENSE +1 -1
  51. gac-0.17.2.dist-info/METADATA +0 -221
  52. gac-0.17.2.dist-info/RECORD +0 -20
  53. {gac-0.17.2.dist-info → gac-3.6.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,98 @@
1
+ """Replicate API provider for gac."""
2
+
3
+ import os
4
+
5
+ import httpx
6
+
7
+ from gac.errors import AIError
8
+
9
+
10
+ def call_replicate_api(model: str, messages: list[dict], temperature: float, max_tokens: int) -> str:
11
+ """Call Replicate API directly."""
12
+ api_key = os.getenv("REPLICATE_API_TOKEN")
13
+ if not api_key:
14
+ raise AIError.authentication_error("REPLICATE_API_TOKEN not found in environment variables")
15
+
16
+ # Replicate uses a different endpoint for language models
17
+ url = "https://api.replicate.com/v1/predictions"
18
+ headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"}
19
+
20
+ # Convert messages to a single prompt for Replicate
21
+ prompt_parts = []
22
+ system_message = None
23
+
24
+ for message in messages:
25
+ role = message.get("role")
26
+ content = message.get("content", "")
27
+
28
+ if role == "system":
29
+ system_message = content
30
+ elif role == "user":
31
+ prompt_parts.append(f"Human: {content}")
32
+ elif role == "assistant":
33
+ prompt_parts.append(f"Assistant: {content}")
34
+
35
+ # Add system message at the beginning if present
36
+ if system_message:
37
+ prompt_parts.insert(0, f"System: {system_message}")
38
+
39
+ # Add final assistant prompt
40
+ prompt_parts.append("Assistant:")
41
+ full_prompt = "\n\n".join(prompt_parts)
42
+
43
+ # Replicate prediction payload
44
+ data = {
45
+ "version": model, # Replicate uses version string as model identifier
46
+ "input": {
47
+ "prompt": full_prompt,
48
+ "temperature": temperature,
49
+ "max_tokens": max_tokens,
50
+ },
51
+ }
52
+
53
+ try:
54
+ # Create prediction
55
+ response = httpx.post(url, headers=headers, json=data, timeout=120)
56
+ response.raise_for_status()
57
+ prediction_data = response.json()
58
+
59
+ # Get the prediction URL to check status
60
+ get_url = f"https://api.replicate.com/v1/predictions/{prediction_data['id']}"
61
+
62
+ # Poll for completion (Replicate predictions are async)
63
+ max_wait_time = 120
64
+ wait_interval = 2
65
+ elapsed_time = 0
66
+
67
+ while elapsed_time < max_wait_time:
68
+ get_response = httpx.get(get_url, headers=headers, timeout=120)
69
+ get_response.raise_for_status()
70
+ status_data = get_response.json()
71
+
72
+ if status_data["status"] == "succeeded":
73
+ content = status_data["output"]
74
+ if not content:
75
+ raise AIError.model_error("Replicate API returned empty content")
76
+ return content
77
+ elif status_data["status"] == "failed":
78
+ raise AIError.model_error(f"Replicate prediction failed: {status_data.get('error', 'Unknown error')}")
79
+ elif status_data["status"] in ["starting", "processing"]:
80
+ import time
81
+
82
+ time.sleep(wait_interval)
83
+ elapsed_time += wait_interval
84
+ else:
85
+ raise AIError.model_error(f"Replicate API returned unknown status: {status_data['status']}")
86
+
87
+ raise AIError.timeout_error("Replicate API prediction timed out")
88
+
89
+ except httpx.HTTPStatusError as e:
90
+ if e.response.status_code == 429:
91
+ raise AIError.rate_limit_error(f"Replicate API rate limit exceeded: {e.response.text}") from e
92
+ elif e.response.status_code == 401:
93
+ raise AIError.authentication_error(f"Replicate API authentication failed: {e.response.text}") from e
94
+ raise AIError.model_error(f"Replicate API error: {e.response.status_code} - {e.response.text}") from e
95
+ except httpx.TimeoutException as e:
96
+ raise AIError.timeout_error(f"Replicate API request timed out: {str(e)}") from e
97
+ except Exception as e:
98
+ raise AIError.model_error(f"Error calling Replicate API: {str(e)}") from e
@@ -0,0 +1,51 @@
1
+ """StreamLake (Vanchin) API provider for gac."""
2
+
3
+ import os
4
+
5
+ import httpx
6
+
7
+ from gac.errors import AIError
8
+
9
+
10
+ def call_streamlake_api(model: str, messages: list[dict], temperature: float, max_tokens: int) -> str:
11
+ """Call StreamLake (Vanchin) chat completions API."""
12
+ api_key = os.getenv("STREAMLAKE_API_KEY") or os.getenv("VC_API_KEY")
13
+ if not api_key:
14
+ raise AIError.authentication_error(
15
+ "STREAMLAKE_API_KEY not found in environment variables (VC_API_KEY alias also not set)"
16
+ )
17
+
18
+ url = "https://vanchin.streamlake.ai/api/gateway/v1/endpoints/chat/completions"
19
+ headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
20
+
21
+ data = {
22
+ "model": model,
23
+ "messages": messages,
24
+ "temperature": temperature,
25
+ "max_tokens": max_tokens,
26
+ }
27
+
28
+ try:
29
+ response = httpx.post(url, headers=headers, json=data, timeout=120)
30
+ response.raise_for_status()
31
+ response_data = response.json()
32
+ choices = response_data.get("choices")
33
+ if not choices:
34
+ raise AIError.model_error("StreamLake API returned no choices")
35
+
36
+ message = choices[0].get("message", {})
37
+ content = message.get("content")
38
+ if content is None:
39
+ raise AIError.model_error("StreamLake API returned null content")
40
+ if content == "":
41
+ raise AIError.model_error("StreamLake API returned empty content")
42
+
43
+ return content
44
+ except httpx.HTTPStatusError as e:
45
+ if e.response.status_code == 429:
46
+ raise AIError.rate_limit_error(f"StreamLake API rate limit exceeded: {e.response.text}") from e
47
+ raise AIError.model_error(f"StreamLake API error: {e.response.status_code} - {e.response.text}") from e
48
+ except httpx.TimeoutException as e:
49
+ raise AIError.timeout_error(f"StreamLake API request timed out: {str(e)}") from e
50
+ except Exception as e: # noqa: BLE001 - convert to AIError
51
+ raise AIError.model_error(f"Error calling StreamLake API: {str(e)}") from e
@@ -0,0 +1,42 @@
1
+ """Synthetic.new API provider for gac."""
2
+
3
+ import os
4
+
5
+ import httpx
6
+
7
+ from gac.errors import AIError
8
+
9
+
10
+ def call_synthetic_api(model: str, messages: list[dict], temperature: float, max_tokens: int) -> str:
11
+ """Call Synthetic API directly."""
12
+ # Handle model names without hf: prefix
13
+ if not model.startswith("hf:"):
14
+ model = f"hf:{model}"
15
+
16
+ api_key = os.getenv("SYNTHETIC_API_KEY") or os.getenv("SYN_API_KEY")
17
+ if not api_key:
18
+ raise AIError.authentication_error("SYNTHETIC_API_KEY or SYN_API_KEY not found in environment variables")
19
+
20
+ url = "https://api.synthetic.new/openai/v1/chat/completions"
21
+ headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
22
+
23
+ data = {"model": model, "messages": messages, "temperature": temperature, "max_completion_tokens": max_tokens}
24
+
25
+ try:
26
+ response = httpx.post(url, headers=headers, json=data, timeout=120)
27
+ response.raise_for_status()
28
+ response_data = response.json()
29
+ content = response_data["choices"][0]["message"]["content"]
30
+ if content is None:
31
+ raise AIError.model_error("Synthetic.new API returned null content")
32
+ if content == "":
33
+ raise AIError.model_error("Synthetic.new API returned empty content")
34
+ return content
35
+ except httpx.HTTPStatusError as e:
36
+ if e.response.status_code == 429:
37
+ raise AIError.rate_limit_error(f"Synthetic.new API rate limit exceeded: {e.response.text}") from e
38
+ raise AIError.model_error(f"Synthetic.new API error: {e.response.status_code} - {e.response.text}") from e
39
+ except httpx.TimeoutException as e:
40
+ raise AIError.timeout_error(f"Synthetic.new API request timed out: {str(e)}") from e
41
+ except Exception as e:
42
+ raise AIError.model_error(f"Error calling Synthetic.new API: {str(e)}") from e
@@ -0,0 +1,38 @@
1
+ """Together AI API provider for gac."""
2
+
3
+ import os
4
+
5
+ import httpx
6
+
7
+ from gac.errors import AIError
8
+
9
+
10
+ def call_together_api(model: str, messages: list[dict], temperature: float, max_tokens: int) -> str:
11
+ """Call Together AI API directly."""
12
+ api_key = os.getenv("TOGETHER_API_KEY")
13
+ if not api_key:
14
+ raise AIError.authentication_error("TOGETHER_API_KEY not found in environment variables")
15
+
16
+ url = "https://api.together.xyz/v1/chat/completions"
17
+ headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
18
+
19
+ data = {"model": model, "messages": messages, "temperature": temperature, "max_tokens": max_tokens}
20
+
21
+ try:
22
+ response = httpx.post(url, headers=headers, json=data, timeout=120)
23
+ response.raise_for_status()
24
+ response_data = response.json()
25
+ content = response_data["choices"][0]["message"]["content"]
26
+ if content is None:
27
+ raise AIError.model_error("Together AI API returned null content")
28
+ if content == "":
29
+ raise AIError.model_error("Together AI API returned empty content")
30
+ return content
31
+ except httpx.HTTPStatusError as e:
32
+ if e.response.status_code == 429:
33
+ raise AIError.rate_limit_error(f"Together AI API rate limit exceeded: {e.response.text}") from e
34
+ raise AIError.model_error(f"Together AI API error: {e.response.status_code} - {e.response.text}") from e
35
+ except httpx.TimeoutException as e:
36
+ raise AIError.timeout_error(f"Together AI API request timed out: {str(e)}") from e
37
+ except Exception as e:
38
+ raise AIError.model_error(f"Error calling Together AI API: {str(e)}") from e
gac/providers/zai.py ADDED
@@ -0,0 +1,59 @@
1
+ """Z.AI API provider for gac."""
2
+
3
+ import os
4
+
5
+ import httpx
6
+
7
+ from gac.errors import AIError
8
+
9
+
10
+ def _call_zai_api_impl(
11
+ url: str, api_name: str, model: str, messages: list[dict], temperature: float, max_tokens: int
12
+ ) -> str:
13
+ """Internal implementation for Z.AI API calls."""
14
+ api_key = os.getenv("ZAI_API_KEY")
15
+ if not api_key:
16
+ raise AIError.authentication_error("ZAI_API_KEY not found in environment variables")
17
+
18
+ headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
19
+ data = {"model": model, "messages": messages, "temperature": temperature, "max_tokens": max_tokens}
20
+
21
+ try:
22
+ response = httpx.post(url, headers=headers, json=data, timeout=120)
23
+ response.raise_for_status()
24
+ response_data = response.json()
25
+
26
+ # Handle different possible response structures
27
+ if "choices" in response_data and len(response_data["choices"]) > 0:
28
+ choice = response_data["choices"][0]
29
+ if "message" in choice and "content" in choice["message"]:
30
+ content = choice["message"]["content"]
31
+ if content is None:
32
+ raise AIError.model_error(f"{api_name} API returned null content")
33
+ if content == "":
34
+ raise AIError.model_error(f"{api_name} API returned empty content")
35
+ return content
36
+ else:
37
+ raise AIError.model_error(f"{api_name} API response missing content: {response_data}")
38
+ else:
39
+ raise AIError.model_error(f"{api_name} API unexpected response structure: {response_data}")
40
+ except httpx.HTTPStatusError as e:
41
+ if e.response.status_code == 429:
42
+ raise AIError.rate_limit_error(f"{api_name} API rate limit exceeded: {e.response.text}") from e
43
+ raise AIError.model_error(f"{api_name} API error: {e.response.status_code} - {e.response.text}") from e
44
+ except httpx.TimeoutException as e:
45
+ raise AIError.timeout_error(f"{api_name} API request timed out: {str(e)}") from e
46
+ except Exception as e:
47
+ raise AIError.model_error(f"Error calling {api_name} API: {str(e)}") from e
48
+
49
+
50
+ def call_zai_api(model: str, messages: list[dict], temperature: float, max_tokens: int) -> str:
51
+ """Call Z.AI regular API directly."""
52
+ url = "https://api.z.ai/api/paas/v4/chat/completions"
53
+ return _call_zai_api_impl(url, "Z.AI", model, messages, temperature, max_tokens)
54
+
55
+
56
+ def call_zai_coding_api(model: str, messages: list[dict], temperature: float, max_tokens: int) -> str:
57
+ """Call Z.AI coding API directly."""
58
+ url = "https://api.z.ai/api/coding/paas/v4/chat/completions"
59
+ return _call_zai_api_impl(url, "Z.AI coding", model, messages, temperature, max_tokens)
gac/security.py ADDED
@@ -0,0 +1,293 @@
1
+ #!/usr/bin/env python3
2
+ """Security utilities for detecting secrets and API keys in git diffs.
3
+
4
+ This module provides functions to scan staged changes for potential secrets,
5
+ API keys, and other sensitive information that should not be committed.
6
+ """
7
+
8
+ import logging
9
+ import re
10
+ from dataclasses import dataclass
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ @dataclass
16
+ class DetectedSecret:
17
+ """Represents a detected secret in a file."""
18
+
19
+ file_path: str
20
+ line_number: int | None
21
+ secret_type: str
22
+ matched_text: str
23
+ context: str | None = None
24
+
25
+
26
+ class SecretPatterns:
27
+ """Regex patterns for detecting various types of secrets and API keys."""
28
+
29
+ # AWS Access Keys
30
+ AWS_ACCESS_KEY_ID = re.compile(r"(?:AWS_ACCESS_KEY_ID|aws_access_key_id)[\s:=\"']+([A-Z0-9]{20})", re.IGNORECASE)
31
+ AWS_SECRET_ACCESS_KEY = re.compile(
32
+ r"(?:AWS_SECRET_ACCESS_KEY|aws_secret_access_key)[\s:=\"']+([A-Za-z0-9/+=]{40})", re.IGNORECASE
33
+ )
34
+ AWS_SESSION_TOKEN = re.compile(r"(?:AWS_SESSION_TOKEN|aws_session_token)[\s:=\"']+([A-Za-z0-9/+=]+)", re.IGNORECASE)
35
+
36
+ # Generic API Keys
37
+ GENERIC_API_KEY = re.compile(
38
+ r"(?:api[-_]?key|api[-_]?secret|access[-_]?key|secret[-_]?key)[\s:=\"']+([A-Za-z0-9_\-]{20,})", re.IGNORECASE
39
+ )
40
+
41
+ # GitHub Tokens
42
+ GITHUB_TOKEN = re.compile(r"(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9]{36,}")
43
+
44
+ # OpenAI API Keys
45
+ OPENAI_API_KEY = re.compile(r"sk-[A-Za-z0-9]{20,}")
46
+
47
+ # Anthropic API Keys
48
+ ANTHROPIC_API_KEY = re.compile(r"sk-ant-[A-Za-z0-9\-_]{95,}")
49
+
50
+ # Stripe Keys
51
+ STRIPE_KEY = re.compile(r"(?:sk|pk|rk)_(?:live|test)_[A-Za-z0-9]{24,}")
52
+
53
+ # Private Keys (PEM format)
54
+ PRIVATE_KEY = re.compile(r"-----BEGIN (?:RSA |EC |OPENSSH )?PRIVATE KEY-----")
55
+
56
+ # Bearer Tokens (require actual token with specific characteristics)
57
+ BEARER_TOKEN = re.compile(r"Bearer\s+[A-Za-z0-9]{20,}[/=]*(?:\s|$)", re.IGNORECASE)
58
+
59
+ # JWT Tokens
60
+ JWT_TOKEN = re.compile(r"eyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+")
61
+
62
+ # Database URLs with credentials
63
+ DATABASE_URL = re.compile(
64
+ r"(?:postgresql|mysql|mongodb|redis)://[A-Za-z0-9_-]+:[A-Za-z0-9_@!#$%^&*()+-=]+@[A-Za-z0-9.-]+",
65
+ re.IGNORECASE,
66
+ )
67
+
68
+ # SSH Private Keys
69
+ SSH_PRIVATE_KEY = re.compile(r"-----BEGIN (?:RSA|DSA|EC|OPENSSH) PRIVATE KEY-----")
70
+
71
+ # Slack Tokens
72
+ SLACK_TOKEN = re.compile(r"xox[baprs]-[A-Za-z0-9-]+")
73
+
74
+ # Google API Keys
75
+ GOOGLE_API_KEY = re.compile(r"AIza[0-9A-Za-z_-]{35}")
76
+
77
+ # Twilio API Keys
78
+ TWILIO_API_KEY = re.compile(r"SK[a-f0-9]{32}")
79
+
80
+ # Generic Password Fields
81
+ PASSWORD = re.compile(r"(?:password|passwd|pwd)[\s:=\"']+([^\s\"']{8,})", re.IGNORECASE)
82
+
83
+ # Excluded patterns (common false positives)
84
+ EXCLUDED_PATTERNS = [
85
+ re.compile(r"(?:example|sample|dummy|placeholder|your[-_]?api[-_]?key)", re.IGNORECASE),
86
+ re.compile(r"(?:xxx+|yyy+|zzz+)", re.IGNORECASE),
87
+ re.compile(r"\b(?:123456|password|changeme|secret|testpass|admin)\b", re.IGNORECASE), # Word boundaries
88
+ re.compile(r"ghp_[a-f0-9]{16}", re.IGNORECASE), # Short GitHub tokens (examples)
89
+ re.compile(r"sk-[a-f0-9]{16}", re.IGNORECASE), # Short OpenAI keys (examples)
90
+ re.compile(r"Bearer Token", re.IGNORECASE), # Documentation text
91
+ re.compile(r"Add Bearer Token", re.IGNORECASE), # Documentation text
92
+ re.compile(r"Test Bearer Token", re.IGNORECASE), # Documentation text
93
+ ]
94
+
95
+ @classmethod
96
+ def get_all_patterns(cls) -> dict[str, re.Pattern]:
97
+ """Get all secret detection patterns.
98
+
99
+ Returns:
100
+ Dictionary mapping pattern names to compiled regex patterns
101
+ """
102
+ patterns = {}
103
+ for name, value in vars(cls).items():
104
+ if isinstance(value, re.Pattern) and not name.startswith("EXCLUDED"):
105
+ # Convert pattern name to human-readable format
106
+ readable_name = name.replace("_", " ").title()
107
+ patterns[readable_name] = value
108
+ return patterns
109
+
110
+
111
+ def is_false_positive(matched_text: str, file_path: str = "") -> bool:
112
+ """Check if a matched secret is likely a false positive.
113
+
114
+ Args:
115
+ matched_text: The text that matched a secret pattern
116
+ file_path: The file path where the match was found (for context-based filtering)
117
+
118
+ Returns:
119
+ True if the match is likely a false positive
120
+ """
121
+ # Check against excluded patterns
122
+ for pattern in SecretPatterns.EXCLUDED_PATTERNS:
123
+ if pattern.search(matched_text):
124
+ return True
125
+
126
+ # Check for all-same characters (e.g., "xxxxxxxxxxxxxxxx")
127
+ if len(set(matched_text.lower())) <= 3 and len(matched_text) > 10:
128
+ return True
129
+
130
+ # Special handling for .env.example, .env.template, .env.sample files
131
+ if any(example_file in file_path for example_file in [".env.example", ".env.template", ".env.sample"]):
132
+ return True
133
+
134
+ return False
135
+
136
+
137
+ def extract_file_path_from_diff_section(section: str) -> str | None:
138
+ """Extract the file path from a git diff section.
139
+
140
+ Args:
141
+ section: A git diff section
142
+
143
+ Returns:
144
+ The file path or None if not found
145
+ """
146
+ match = re.search(r"diff --git a/(.*?) b/", section)
147
+ if match:
148
+ return match.group(1)
149
+ return None
150
+
151
+
152
+ def extract_line_number_from_hunk(line: str, hunk_header: str | None) -> int | None:
153
+ """Extract the line number from a diff hunk.
154
+
155
+ Args:
156
+ line: The diff line containing the secret
157
+ hunk_header: The most recent hunk header (e.g., "@@ -1,4 +1,5 @@")
158
+
159
+ Returns:
160
+ The line number or None if not determinable
161
+ """
162
+ if not hunk_header:
163
+ return None
164
+
165
+ # Parse hunk header to get starting line number: @@ -old_start,old_count +new_start,new_count @@
166
+ match = re.search(r"@@ -\d+(?:,\d+)? \+(\d+)", hunk_header)
167
+ if not match:
168
+ return None
169
+
170
+ return int(match.group(1))
171
+
172
+
173
+ def scan_diff_section(section: str) -> list[DetectedSecret]:
174
+ """Scan a single git diff section for secrets.
175
+
176
+ Args:
177
+ section: A git diff section to scan
178
+
179
+ Returns:
180
+ List of detected secrets
181
+ """
182
+ secrets: list[DetectedSecret] = []
183
+ file_path = extract_file_path_from_diff_section(section)
184
+
185
+ if not file_path:
186
+ return secrets
187
+
188
+ patterns = SecretPatterns.get_all_patterns()
189
+ lines = section.split("\n")
190
+ line_counter = 0
191
+
192
+ for line in lines:
193
+ # Track hunk headers for line number extraction
194
+ if line.startswith("@@"):
195
+ # Reset line counter based on hunk header (this is the starting line number in the new file)
196
+ match = re.search(r"@@ -\d+(?:,\d+)? \+(\d+)", line)
197
+ if match:
198
+ line_counter = int(match.group(1)) - 1 # Start one line before, will increment correctly
199
+ continue
200
+
201
+ # Skip metadata lines
202
+ if line.startswith("+++") or line.startswith("---"):
203
+ continue
204
+
205
+ # Increment line counter for both added and context lines
206
+ if line.startswith("+") or line.startswith(" "):
207
+ line_counter += 1
208
+
209
+ # Only scan added lines (starting with '+')
210
+ if line.startswith("+") and not line.startswith("+++"):
211
+ # Check each pattern
212
+ content = line[1:] # Remove the '+' prefix for pattern matching
213
+ for pattern_name, pattern in patterns.items():
214
+ matches = pattern.finditer(content)
215
+ for match in matches:
216
+ matched_text = match.group(0)
217
+
218
+ # Skip false positives
219
+ if is_false_positive(matched_text, file_path):
220
+ logger.debug(f"Skipping false positive: {matched_text}")
221
+ continue
222
+
223
+ # Truncate matched text for display (avoid showing full secrets)
224
+ from gac.constants import Utility
225
+
226
+ display_text = (
227
+ matched_text[: Utility.MAX_DISPLAYED_SECRET_LENGTH] + "..."
228
+ if len(matched_text) > Utility.MAX_DISPLAYED_SECRET_LENGTH
229
+ else matched_text
230
+ )
231
+
232
+ secrets.append(
233
+ DetectedSecret(
234
+ file_path=file_path,
235
+ line_number=line_counter,
236
+ secret_type=pattern_name,
237
+ matched_text=display_text,
238
+ context=content.strip(),
239
+ )
240
+ )
241
+
242
+ return secrets
243
+
244
+
245
+ def scan_staged_diff(diff: str) -> list[DetectedSecret]:
246
+ """Scan staged git diff for secrets and API keys.
247
+
248
+ Args:
249
+ diff: The staged git diff to scan
250
+
251
+ Returns:
252
+ List of detected secrets
253
+ """
254
+ if not diff:
255
+ return []
256
+
257
+ # Split diff into sections (one per file)
258
+ sections = re.split(r"(?=^diff --git )", diff, flags=re.MULTILINE)
259
+ all_secrets = []
260
+
261
+ for section in sections:
262
+ if not section.strip():
263
+ continue
264
+
265
+ # Validate that this is a real git diff section
266
+ # Real diff sections must have diff --git header followed by --- and +++ lines
267
+ if not re.search(r"^diff --git ", section, flags=re.MULTILINE):
268
+ continue
269
+
270
+ if not re.search(r"^--- ", section, flags=re.MULTILINE):
271
+ continue
272
+
273
+ if not re.search(r"^\+\+\+ ", section, flags=re.MULTILINE):
274
+ continue
275
+
276
+ secrets = scan_diff_section(section)
277
+ all_secrets.extend(secrets)
278
+
279
+ logger.info(f"Secret scan complete: found {len(all_secrets)} potential secrets")
280
+ return all_secrets
281
+
282
+
283
+ def get_affected_files(secrets: list[DetectedSecret]) -> list[str]:
284
+ """Get unique list of files containing detected secrets.
285
+
286
+ Args:
287
+ secrets: List of detected secrets
288
+
289
+ Returns:
290
+ Sorted list of unique file paths
291
+ """
292
+ files = {secret.file_path for secret in secrets}
293
+ return sorted(files)