gac 0.17.2__py3-none-any.whl → 3.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gac/__version__.py +1 -1
- gac/ai.py +69 -123
- gac/ai_utils.py +227 -0
- gac/auth_cli.py +69 -0
- gac/cli.py +87 -19
- gac/config.py +13 -7
- gac/config_cli.py +26 -5
- gac/constants.py +176 -5
- gac/errors.py +14 -0
- gac/git.py +207 -11
- gac/init_cli.py +52 -29
- gac/language_cli.py +378 -0
- gac/main.py +922 -189
- gac/model_cli.py +374 -0
- gac/oauth/__init__.py +1 -0
- gac/oauth/claude_code.py +397 -0
- gac/preprocess.py +5 -5
- gac/prompt.py +656 -219
- gac/providers/__init__.py +88 -0
- gac/providers/anthropic.py +51 -0
- gac/providers/azure_openai.py +97 -0
- gac/providers/cerebras.py +38 -0
- gac/providers/chutes.py +71 -0
- gac/providers/claude_code.py +102 -0
- gac/providers/custom_anthropic.py +133 -0
- gac/providers/custom_openai.py +98 -0
- gac/providers/deepseek.py +38 -0
- gac/providers/fireworks.py +38 -0
- gac/providers/gemini.py +87 -0
- gac/providers/groq.py +63 -0
- gac/providers/kimi_coding.py +63 -0
- gac/providers/lmstudio.py +59 -0
- gac/providers/minimax.py +38 -0
- gac/providers/mistral.py +38 -0
- gac/providers/moonshot.py +38 -0
- gac/providers/ollama.py +50 -0
- gac/providers/openai.py +38 -0
- gac/providers/openrouter.py +58 -0
- gac/providers/replicate.py +98 -0
- gac/providers/streamlake.py +51 -0
- gac/providers/synthetic.py +42 -0
- gac/providers/together.py +38 -0
- gac/providers/zai.py +59 -0
- gac/security.py +293 -0
- gac/utils.py +243 -4
- gac/workflow_utils.py +222 -0
- gac-3.6.0.dist-info/METADATA +281 -0
- gac-3.6.0.dist-info/RECORD +53 -0
- {gac-0.17.2.dist-info → gac-3.6.0.dist-info}/WHEEL +1 -1
- {gac-0.17.2.dist-info → gac-3.6.0.dist-info}/licenses/LICENSE +1 -1
- gac-0.17.2.dist-info/METADATA +0 -221
- gac-0.17.2.dist-info/RECORD +0 -20
- {gac-0.17.2.dist-info → gac-3.6.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Replicate API provider for gac."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
|
|
7
|
+
from gac.errors import AIError
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def call_replicate_api(model: str, messages: list[dict], temperature: float, max_tokens: int) -> str:
|
|
11
|
+
"""Call Replicate API directly."""
|
|
12
|
+
api_key = os.getenv("REPLICATE_API_TOKEN")
|
|
13
|
+
if not api_key:
|
|
14
|
+
raise AIError.authentication_error("REPLICATE_API_TOKEN not found in environment variables")
|
|
15
|
+
|
|
16
|
+
# Replicate uses a different endpoint for language models
|
|
17
|
+
url = "https://api.replicate.com/v1/predictions"
|
|
18
|
+
headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"}
|
|
19
|
+
|
|
20
|
+
# Convert messages to a single prompt for Replicate
|
|
21
|
+
prompt_parts = []
|
|
22
|
+
system_message = None
|
|
23
|
+
|
|
24
|
+
for message in messages:
|
|
25
|
+
role = message.get("role")
|
|
26
|
+
content = message.get("content", "")
|
|
27
|
+
|
|
28
|
+
if role == "system":
|
|
29
|
+
system_message = content
|
|
30
|
+
elif role == "user":
|
|
31
|
+
prompt_parts.append(f"Human: {content}")
|
|
32
|
+
elif role == "assistant":
|
|
33
|
+
prompt_parts.append(f"Assistant: {content}")
|
|
34
|
+
|
|
35
|
+
# Add system message at the beginning if present
|
|
36
|
+
if system_message:
|
|
37
|
+
prompt_parts.insert(0, f"System: {system_message}")
|
|
38
|
+
|
|
39
|
+
# Add final assistant prompt
|
|
40
|
+
prompt_parts.append("Assistant:")
|
|
41
|
+
full_prompt = "\n\n".join(prompt_parts)
|
|
42
|
+
|
|
43
|
+
# Replicate prediction payload
|
|
44
|
+
data = {
|
|
45
|
+
"version": model, # Replicate uses version string as model identifier
|
|
46
|
+
"input": {
|
|
47
|
+
"prompt": full_prompt,
|
|
48
|
+
"temperature": temperature,
|
|
49
|
+
"max_tokens": max_tokens,
|
|
50
|
+
},
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
# Create prediction
|
|
55
|
+
response = httpx.post(url, headers=headers, json=data, timeout=120)
|
|
56
|
+
response.raise_for_status()
|
|
57
|
+
prediction_data = response.json()
|
|
58
|
+
|
|
59
|
+
# Get the prediction URL to check status
|
|
60
|
+
get_url = f"https://api.replicate.com/v1/predictions/{prediction_data['id']}"
|
|
61
|
+
|
|
62
|
+
# Poll for completion (Replicate predictions are async)
|
|
63
|
+
max_wait_time = 120
|
|
64
|
+
wait_interval = 2
|
|
65
|
+
elapsed_time = 0
|
|
66
|
+
|
|
67
|
+
while elapsed_time < max_wait_time:
|
|
68
|
+
get_response = httpx.get(get_url, headers=headers, timeout=120)
|
|
69
|
+
get_response.raise_for_status()
|
|
70
|
+
status_data = get_response.json()
|
|
71
|
+
|
|
72
|
+
if status_data["status"] == "succeeded":
|
|
73
|
+
content = status_data["output"]
|
|
74
|
+
if not content:
|
|
75
|
+
raise AIError.model_error("Replicate API returned empty content")
|
|
76
|
+
return content
|
|
77
|
+
elif status_data["status"] == "failed":
|
|
78
|
+
raise AIError.model_error(f"Replicate prediction failed: {status_data.get('error', 'Unknown error')}")
|
|
79
|
+
elif status_data["status"] in ["starting", "processing"]:
|
|
80
|
+
import time
|
|
81
|
+
|
|
82
|
+
time.sleep(wait_interval)
|
|
83
|
+
elapsed_time += wait_interval
|
|
84
|
+
else:
|
|
85
|
+
raise AIError.model_error(f"Replicate API returned unknown status: {status_data['status']}")
|
|
86
|
+
|
|
87
|
+
raise AIError.timeout_error("Replicate API prediction timed out")
|
|
88
|
+
|
|
89
|
+
except httpx.HTTPStatusError as e:
|
|
90
|
+
if e.response.status_code == 429:
|
|
91
|
+
raise AIError.rate_limit_error(f"Replicate API rate limit exceeded: {e.response.text}") from e
|
|
92
|
+
elif e.response.status_code == 401:
|
|
93
|
+
raise AIError.authentication_error(f"Replicate API authentication failed: {e.response.text}") from e
|
|
94
|
+
raise AIError.model_error(f"Replicate API error: {e.response.status_code} - {e.response.text}") from e
|
|
95
|
+
except httpx.TimeoutException as e:
|
|
96
|
+
raise AIError.timeout_error(f"Replicate API request timed out: {str(e)}") from e
|
|
97
|
+
except Exception as e:
|
|
98
|
+
raise AIError.model_error(f"Error calling Replicate API: {str(e)}") from e
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""StreamLake (Vanchin) API provider for gac."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
|
|
7
|
+
from gac.errors import AIError
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def call_streamlake_api(model: str, messages: list[dict], temperature: float, max_tokens: int) -> str:
|
|
11
|
+
"""Call StreamLake (Vanchin) chat completions API."""
|
|
12
|
+
api_key = os.getenv("STREAMLAKE_API_KEY") or os.getenv("VC_API_KEY")
|
|
13
|
+
if not api_key:
|
|
14
|
+
raise AIError.authentication_error(
|
|
15
|
+
"STREAMLAKE_API_KEY not found in environment variables (VC_API_KEY alias also not set)"
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
url = "https://vanchin.streamlake.ai/api/gateway/v1/endpoints/chat/completions"
|
|
19
|
+
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
|
|
20
|
+
|
|
21
|
+
data = {
|
|
22
|
+
"model": model,
|
|
23
|
+
"messages": messages,
|
|
24
|
+
"temperature": temperature,
|
|
25
|
+
"max_tokens": max_tokens,
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
response = httpx.post(url, headers=headers, json=data, timeout=120)
|
|
30
|
+
response.raise_for_status()
|
|
31
|
+
response_data = response.json()
|
|
32
|
+
choices = response_data.get("choices")
|
|
33
|
+
if not choices:
|
|
34
|
+
raise AIError.model_error("StreamLake API returned no choices")
|
|
35
|
+
|
|
36
|
+
message = choices[0].get("message", {})
|
|
37
|
+
content = message.get("content")
|
|
38
|
+
if content is None:
|
|
39
|
+
raise AIError.model_error("StreamLake API returned null content")
|
|
40
|
+
if content == "":
|
|
41
|
+
raise AIError.model_error("StreamLake API returned empty content")
|
|
42
|
+
|
|
43
|
+
return content
|
|
44
|
+
except httpx.HTTPStatusError as e:
|
|
45
|
+
if e.response.status_code == 429:
|
|
46
|
+
raise AIError.rate_limit_error(f"StreamLake API rate limit exceeded: {e.response.text}") from e
|
|
47
|
+
raise AIError.model_error(f"StreamLake API error: {e.response.status_code} - {e.response.text}") from e
|
|
48
|
+
except httpx.TimeoutException as e:
|
|
49
|
+
raise AIError.timeout_error(f"StreamLake API request timed out: {str(e)}") from e
|
|
50
|
+
except Exception as e: # noqa: BLE001 - convert to AIError
|
|
51
|
+
raise AIError.model_error(f"Error calling StreamLake API: {str(e)}") from e
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Synthetic.new API provider for gac."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
|
|
7
|
+
from gac.errors import AIError
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def call_synthetic_api(model: str, messages: list[dict], temperature: float, max_tokens: int) -> str:
|
|
11
|
+
"""Call Synthetic API directly."""
|
|
12
|
+
# Handle model names without hf: prefix
|
|
13
|
+
if not model.startswith("hf:"):
|
|
14
|
+
model = f"hf:{model}"
|
|
15
|
+
|
|
16
|
+
api_key = os.getenv("SYNTHETIC_API_KEY") or os.getenv("SYN_API_KEY")
|
|
17
|
+
if not api_key:
|
|
18
|
+
raise AIError.authentication_error("SYNTHETIC_API_KEY or SYN_API_KEY not found in environment variables")
|
|
19
|
+
|
|
20
|
+
url = "https://api.synthetic.new/openai/v1/chat/completions"
|
|
21
|
+
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
|
|
22
|
+
|
|
23
|
+
data = {"model": model, "messages": messages, "temperature": temperature, "max_completion_tokens": max_tokens}
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
response = httpx.post(url, headers=headers, json=data, timeout=120)
|
|
27
|
+
response.raise_for_status()
|
|
28
|
+
response_data = response.json()
|
|
29
|
+
content = response_data["choices"][0]["message"]["content"]
|
|
30
|
+
if content is None:
|
|
31
|
+
raise AIError.model_error("Synthetic.new API returned null content")
|
|
32
|
+
if content == "":
|
|
33
|
+
raise AIError.model_error("Synthetic.new API returned empty content")
|
|
34
|
+
return content
|
|
35
|
+
except httpx.HTTPStatusError as e:
|
|
36
|
+
if e.response.status_code == 429:
|
|
37
|
+
raise AIError.rate_limit_error(f"Synthetic.new API rate limit exceeded: {e.response.text}") from e
|
|
38
|
+
raise AIError.model_error(f"Synthetic.new API error: {e.response.status_code} - {e.response.text}") from e
|
|
39
|
+
except httpx.TimeoutException as e:
|
|
40
|
+
raise AIError.timeout_error(f"Synthetic.new API request timed out: {str(e)}") from e
|
|
41
|
+
except Exception as e:
|
|
42
|
+
raise AIError.model_error(f"Error calling Synthetic.new API: {str(e)}") from e
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Together AI API provider for gac."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
|
|
7
|
+
from gac.errors import AIError
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def call_together_api(model: str, messages: list[dict], temperature: float, max_tokens: int) -> str:
|
|
11
|
+
"""Call Together AI API directly."""
|
|
12
|
+
api_key = os.getenv("TOGETHER_API_KEY")
|
|
13
|
+
if not api_key:
|
|
14
|
+
raise AIError.authentication_error("TOGETHER_API_KEY not found in environment variables")
|
|
15
|
+
|
|
16
|
+
url = "https://api.together.xyz/v1/chat/completions"
|
|
17
|
+
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
|
|
18
|
+
|
|
19
|
+
data = {"model": model, "messages": messages, "temperature": temperature, "max_tokens": max_tokens}
|
|
20
|
+
|
|
21
|
+
try:
|
|
22
|
+
response = httpx.post(url, headers=headers, json=data, timeout=120)
|
|
23
|
+
response.raise_for_status()
|
|
24
|
+
response_data = response.json()
|
|
25
|
+
content = response_data["choices"][0]["message"]["content"]
|
|
26
|
+
if content is None:
|
|
27
|
+
raise AIError.model_error("Together AI API returned null content")
|
|
28
|
+
if content == "":
|
|
29
|
+
raise AIError.model_error("Together AI API returned empty content")
|
|
30
|
+
return content
|
|
31
|
+
except httpx.HTTPStatusError as e:
|
|
32
|
+
if e.response.status_code == 429:
|
|
33
|
+
raise AIError.rate_limit_error(f"Together AI API rate limit exceeded: {e.response.text}") from e
|
|
34
|
+
raise AIError.model_error(f"Together AI API error: {e.response.status_code} - {e.response.text}") from e
|
|
35
|
+
except httpx.TimeoutException as e:
|
|
36
|
+
raise AIError.timeout_error(f"Together AI API request timed out: {str(e)}") from e
|
|
37
|
+
except Exception as e:
|
|
38
|
+
raise AIError.model_error(f"Error calling Together AI API: {str(e)}") from e
|
gac/providers/zai.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Z.AI API provider for gac."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
|
|
7
|
+
from gac.errors import AIError
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _call_zai_api_impl(
|
|
11
|
+
url: str, api_name: str, model: str, messages: list[dict], temperature: float, max_tokens: int
|
|
12
|
+
) -> str:
|
|
13
|
+
"""Internal implementation for Z.AI API calls."""
|
|
14
|
+
api_key = os.getenv("ZAI_API_KEY")
|
|
15
|
+
if not api_key:
|
|
16
|
+
raise AIError.authentication_error("ZAI_API_KEY not found in environment variables")
|
|
17
|
+
|
|
18
|
+
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
|
|
19
|
+
data = {"model": model, "messages": messages, "temperature": temperature, "max_tokens": max_tokens}
|
|
20
|
+
|
|
21
|
+
try:
|
|
22
|
+
response = httpx.post(url, headers=headers, json=data, timeout=120)
|
|
23
|
+
response.raise_for_status()
|
|
24
|
+
response_data = response.json()
|
|
25
|
+
|
|
26
|
+
# Handle different possible response structures
|
|
27
|
+
if "choices" in response_data and len(response_data["choices"]) > 0:
|
|
28
|
+
choice = response_data["choices"][0]
|
|
29
|
+
if "message" in choice and "content" in choice["message"]:
|
|
30
|
+
content = choice["message"]["content"]
|
|
31
|
+
if content is None:
|
|
32
|
+
raise AIError.model_error(f"{api_name} API returned null content")
|
|
33
|
+
if content == "":
|
|
34
|
+
raise AIError.model_error(f"{api_name} API returned empty content")
|
|
35
|
+
return content
|
|
36
|
+
else:
|
|
37
|
+
raise AIError.model_error(f"{api_name} API response missing content: {response_data}")
|
|
38
|
+
else:
|
|
39
|
+
raise AIError.model_error(f"{api_name} API unexpected response structure: {response_data}")
|
|
40
|
+
except httpx.HTTPStatusError as e:
|
|
41
|
+
if e.response.status_code == 429:
|
|
42
|
+
raise AIError.rate_limit_error(f"{api_name} API rate limit exceeded: {e.response.text}") from e
|
|
43
|
+
raise AIError.model_error(f"{api_name} API error: {e.response.status_code} - {e.response.text}") from e
|
|
44
|
+
except httpx.TimeoutException as e:
|
|
45
|
+
raise AIError.timeout_error(f"{api_name} API request timed out: {str(e)}") from e
|
|
46
|
+
except Exception as e:
|
|
47
|
+
raise AIError.model_error(f"Error calling {api_name} API: {str(e)}") from e
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def call_zai_api(model: str, messages: list[dict], temperature: float, max_tokens: int) -> str:
|
|
51
|
+
"""Call Z.AI regular API directly."""
|
|
52
|
+
url = "https://api.z.ai/api/paas/v4/chat/completions"
|
|
53
|
+
return _call_zai_api_impl(url, "Z.AI", model, messages, temperature, max_tokens)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def call_zai_coding_api(model: str, messages: list[dict], temperature: float, max_tokens: int) -> str:
|
|
57
|
+
"""Call Z.AI coding API directly."""
|
|
58
|
+
url = "https://api.z.ai/api/coding/paas/v4/chat/completions"
|
|
59
|
+
return _call_zai_api_impl(url, "Z.AI coding", model, messages, temperature, max_tokens)
|
gac/security.py
ADDED
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Security utilities for detecting secrets and API keys in git diffs.
|
|
3
|
+
|
|
4
|
+
This module provides functions to scan staged changes for potential secrets,
|
|
5
|
+
API keys, and other sensitive information that should not be committed.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import re
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class DetectedSecret:
|
|
17
|
+
"""Represents a detected secret in a file."""
|
|
18
|
+
|
|
19
|
+
file_path: str
|
|
20
|
+
line_number: int | None
|
|
21
|
+
secret_type: str
|
|
22
|
+
matched_text: str
|
|
23
|
+
context: str | None = None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class SecretPatterns:
|
|
27
|
+
"""Regex patterns for detecting various types of secrets and API keys."""
|
|
28
|
+
|
|
29
|
+
# AWS Access Keys
|
|
30
|
+
AWS_ACCESS_KEY_ID = re.compile(r"(?:AWS_ACCESS_KEY_ID|aws_access_key_id)[\s:=\"']+([A-Z0-9]{20})", re.IGNORECASE)
|
|
31
|
+
AWS_SECRET_ACCESS_KEY = re.compile(
|
|
32
|
+
r"(?:AWS_SECRET_ACCESS_KEY|aws_secret_access_key)[\s:=\"']+([A-Za-z0-9/+=]{40})", re.IGNORECASE
|
|
33
|
+
)
|
|
34
|
+
AWS_SESSION_TOKEN = re.compile(r"(?:AWS_SESSION_TOKEN|aws_session_token)[\s:=\"']+([A-Za-z0-9/+=]+)", re.IGNORECASE)
|
|
35
|
+
|
|
36
|
+
# Generic API Keys
|
|
37
|
+
GENERIC_API_KEY = re.compile(
|
|
38
|
+
r"(?:api[-_]?key|api[-_]?secret|access[-_]?key|secret[-_]?key)[\s:=\"']+([A-Za-z0-9_\-]{20,})", re.IGNORECASE
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# GitHub Tokens
|
|
42
|
+
GITHUB_TOKEN = re.compile(r"(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9]{36,}")
|
|
43
|
+
|
|
44
|
+
# OpenAI API Keys
|
|
45
|
+
OPENAI_API_KEY = re.compile(r"sk-[A-Za-z0-9]{20,}")
|
|
46
|
+
|
|
47
|
+
# Anthropic API Keys
|
|
48
|
+
ANTHROPIC_API_KEY = re.compile(r"sk-ant-[A-Za-z0-9\-_]{95,}")
|
|
49
|
+
|
|
50
|
+
# Stripe Keys
|
|
51
|
+
STRIPE_KEY = re.compile(r"(?:sk|pk|rk)_(?:live|test)_[A-Za-z0-9]{24,}")
|
|
52
|
+
|
|
53
|
+
# Private Keys (PEM format)
|
|
54
|
+
PRIVATE_KEY = re.compile(r"-----BEGIN (?:RSA |EC |OPENSSH )?PRIVATE KEY-----")
|
|
55
|
+
|
|
56
|
+
# Bearer Tokens (require actual token with specific characteristics)
|
|
57
|
+
BEARER_TOKEN = re.compile(r"Bearer\s+[A-Za-z0-9]{20,}[/=]*(?:\s|$)", re.IGNORECASE)
|
|
58
|
+
|
|
59
|
+
# JWT Tokens
|
|
60
|
+
JWT_TOKEN = re.compile(r"eyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+")
|
|
61
|
+
|
|
62
|
+
# Database URLs with credentials
|
|
63
|
+
DATABASE_URL = re.compile(
|
|
64
|
+
r"(?:postgresql|mysql|mongodb|redis)://[A-Za-z0-9_-]+:[A-Za-z0-9_@!#$%^&*()+-=]+@[A-Za-z0-9.-]+",
|
|
65
|
+
re.IGNORECASE,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# SSH Private Keys
|
|
69
|
+
SSH_PRIVATE_KEY = re.compile(r"-----BEGIN (?:RSA|DSA|EC|OPENSSH) PRIVATE KEY-----")
|
|
70
|
+
|
|
71
|
+
# Slack Tokens
|
|
72
|
+
SLACK_TOKEN = re.compile(r"xox[baprs]-[A-Za-z0-9-]+")
|
|
73
|
+
|
|
74
|
+
# Google API Keys
|
|
75
|
+
GOOGLE_API_KEY = re.compile(r"AIza[0-9A-Za-z_-]{35}")
|
|
76
|
+
|
|
77
|
+
# Twilio API Keys
|
|
78
|
+
TWILIO_API_KEY = re.compile(r"SK[a-f0-9]{32}")
|
|
79
|
+
|
|
80
|
+
# Generic Password Fields
|
|
81
|
+
PASSWORD = re.compile(r"(?:password|passwd|pwd)[\s:=\"']+([^\s\"']{8,})", re.IGNORECASE)
|
|
82
|
+
|
|
83
|
+
# Excluded patterns (common false positives)
|
|
84
|
+
EXCLUDED_PATTERNS = [
|
|
85
|
+
re.compile(r"(?:example|sample|dummy|placeholder|your[-_]?api[-_]?key)", re.IGNORECASE),
|
|
86
|
+
re.compile(r"(?:xxx+|yyy+|zzz+)", re.IGNORECASE),
|
|
87
|
+
re.compile(r"\b(?:123456|password|changeme|secret|testpass|admin)\b", re.IGNORECASE), # Word boundaries
|
|
88
|
+
re.compile(r"ghp_[a-f0-9]{16}", re.IGNORECASE), # Short GitHub tokens (examples)
|
|
89
|
+
re.compile(r"sk-[a-f0-9]{16}", re.IGNORECASE), # Short OpenAI keys (examples)
|
|
90
|
+
re.compile(r"Bearer Token", re.IGNORECASE), # Documentation text
|
|
91
|
+
re.compile(r"Add Bearer Token", re.IGNORECASE), # Documentation text
|
|
92
|
+
re.compile(r"Test Bearer Token", re.IGNORECASE), # Documentation text
|
|
93
|
+
]
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
def get_all_patterns(cls) -> dict[str, re.Pattern]:
|
|
97
|
+
"""Get all secret detection patterns.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
Dictionary mapping pattern names to compiled regex patterns
|
|
101
|
+
"""
|
|
102
|
+
patterns = {}
|
|
103
|
+
for name, value in vars(cls).items():
|
|
104
|
+
if isinstance(value, re.Pattern) and not name.startswith("EXCLUDED"):
|
|
105
|
+
# Convert pattern name to human-readable format
|
|
106
|
+
readable_name = name.replace("_", " ").title()
|
|
107
|
+
patterns[readable_name] = value
|
|
108
|
+
return patterns
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def is_false_positive(matched_text: str, file_path: str = "") -> bool:
|
|
112
|
+
"""Check if a matched secret is likely a false positive.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
matched_text: The text that matched a secret pattern
|
|
116
|
+
file_path: The file path where the match was found (for context-based filtering)
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
True if the match is likely a false positive
|
|
120
|
+
"""
|
|
121
|
+
# Check against excluded patterns
|
|
122
|
+
for pattern in SecretPatterns.EXCLUDED_PATTERNS:
|
|
123
|
+
if pattern.search(matched_text):
|
|
124
|
+
return True
|
|
125
|
+
|
|
126
|
+
# Check for all-same characters (e.g., "xxxxxxxxxxxxxxxx")
|
|
127
|
+
if len(set(matched_text.lower())) <= 3 and len(matched_text) > 10:
|
|
128
|
+
return True
|
|
129
|
+
|
|
130
|
+
# Special handling for .env.example, .env.template, .env.sample files
|
|
131
|
+
if any(example_file in file_path for example_file in [".env.example", ".env.template", ".env.sample"]):
|
|
132
|
+
return True
|
|
133
|
+
|
|
134
|
+
return False
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def extract_file_path_from_diff_section(section: str) -> str | None:
|
|
138
|
+
"""Extract the file path from a git diff section.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
section: A git diff section
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
The file path or None if not found
|
|
145
|
+
"""
|
|
146
|
+
match = re.search(r"diff --git a/(.*?) b/", section)
|
|
147
|
+
if match:
|
|
148
|
+
return match.group(1)
|
|
149
|
+
return None
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def extract_line_number_from_hunk(line: str, hunk_header: str | None) -> int | None:
|
|
153
|
+
"""Extract the line number from a diff hunk.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
line: The diff line containing the secret
|
|
157
|
+
hunk_header: The most recent hunk header (e.g., "@@ -1,4 +1,5 @@")
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
The line number or None if not determinable
|
|
161
|
+
"""
|
|
162
|
+
if not hunk_header:
|
|
163
|
+
return None
|
|
164
|
+
|
|
165
|
+
# Parse hunk header to get starting line number: @@ -old_start,old_count +new_start,new_count @@
|
|
166
|
+
match = re.search(r"@@ -\d+(?:,\d+)? \+(\d+)", hunk_header)
|
|
167
|
+
if not match:
|
|
168
|
+
return None
|
|
169
|
+
|
|
170
|
+
return int(match.group(1))
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def scan_diff_section(section: str) -> list[DetectedSecret]:
|
|
174
|
+
"""Scan a single git diff section for secrets.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
section: A git diff section to scan
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
List of detected secrets
|
|
181
|
+
"""
|
|
182
|
+
secrets: list[DetectedSecret] = []
|
|
183
|
+
file_path = extract_file_path_from_diff_section(section)
|
|
184
|
+
|
|
185
|
+
if not file_path:
|
|
186
|
+
return secrets
|
|
187
|
+
|
|
188
|
+
patterns = SecretPatterns.get_all_patterns()
|
|
189
|
+
lines = section.split("\n")
|
|
190
|
+
line_counter = 0
|
|
191
|
+
|
|
192
|
+
for line in lines:
|
|
193
|
+
# Track hunk headers for line number extraction
|
|
194
|
+
if line.startswith("@@"):
|
|
195
|
+
# Reset line counter based on hunk header (this is the starting line number in the new file)
|
|
196
|
+
match = re.search(r"@@ -\d+(?:,\d+)? \+(\d+)", line)
|
|
197
|
+
if match:
|
|
198
|
+
line_counter = int(match.group(1)) - 1 # Start one line before, will increment correctly
|
|
199
|
+
continue
|
|
200
|
+
|
|
201
|
+
# Skip metadata lines
|
|
202
|
+
if line.startswith("+++") or line.startswith("---"):
|
|
203
|
+
continue
|
|
204
|
+
|
|
205
|
+
# Increment line counter for both added and context lines
|
|
206
|
+
if line.startswith("+") or line.startswith(" "):
|
|
207
|
+
line_counter += 1
|
|
208
|
+
|
|
209
|
+
# Only scan added lines (starting with '+')
|
|
210
|
+
if line.startswith("+") and not line.startswith("+++"):
|
|
211
|
+
# Check each pattern
|
|
212
|
+
content = line[1:] # Remove the '+' prefix for pattern matching
|
|
213
|
+
for pattern_name, pattern in patterns.items():
|
|
214
|
+
matches = pattern.finditer(content)
|
|
215
|
+
for match in matches:
|
|
216
|
+
matched_text = match.group(0)
|
|
217
|
+
|
|
218
|
+
# Skip false positives
|
|
219
|
+
if is_false_positive(matched_text, file_path):
|
|
220
|
+
logger.debug(f"Skipping false positive: {matched_text}")
|
|
221
|
+
continue
|
|
222
|
+
|
|
223
|
+
# Truncate matched text for display (avoid showing full secrets)
|
|
224
|
+
from gac.constants import Utility
|
|
225
|
+
|
|
226
|
+
display_text = (
|
|
227
|
+
matched_text[: Utility.MAX_DISPLAYED_SECRET_LENGTH] + "..."
|
|
228
|
+
if len(matched_text) > Utility.MAX_DISPLAYED_SECRET_LENGTH
|
|
229
|
+
else matched_text
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
secrets.append(
|
|
233
|
+
DetectedSecret(
|
|
234
|
+
file_path=file_path,
|
|
235
|
+
line_number=line_counter,
|
|
236
|
+
secret_type=pattern_name,
|
|
237
|
+
matched_text=display_text,
|
|
238
|
+
context=content.strip(),
|
|
239
|
+
)
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
return secrets
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def scan_staged_diff(diff: str) -> list[DetectedSecret]:
|
|
246
|
+
"""Scan staged git diff for secrets and API keys.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
diff: The staged git diff to scan
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
List of detected secrets
|
|
253
|
+
"""
|
|
254
|
+
if not diff:
|
|
255
|
+
return []
|
|
256
|
+
|
|
257
|
+
# Split diff into sections (one per file)
|
|
258
|
+
sections = re.split(r"(?=^diff --git )", diff, flags=re.MULTILINE)
|
|
259
|
+
all_secrets = []
|
|
260
|
+
|
|
261
|
+
for section in sections:
|
|
262
|
+
if not section.strip():
|
|
263
|
+
continue
|
|
264
|
+
|
|
265
|
+
# Validate that this is a real git diff section
|
|
266
|
+
# Real diff sections must have diff --git header followed by --- and +++ lines
|
|
267
|
+
if not re.search(r"^diff --git ", section, flags=re.MULTILINE):
|
|
268
|
+
continue
|
|
269
|
+
|
|
270
|
+
if not re.search(r"^--- ", section, flags=re.MULTILINE):
|
|
271
|
+
continue
|
|
272
|
+
|
|
273
|
+
if not re.search(r"^\+\+\+ ", section, flags=re.MULTILINE):
|
|
274
|
+
continue
|
|
275
|
+
|
|
276
|
+
secrets = scan_diff_section(section)
|
|
277
|
+
all_secrets.extend(secrets)
|
|
278
|
+
|
|
279
|
+
logger.info(f"Secret scan complete: found {len(all_secrets)} potential secrets")
|
|
280
|
+
return all_secrets
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def get_affected_files(secrets: list[DetectedSecret]) -> list[str]:
|
|
284
|
+
"""Get unique list of files containing detected secrets.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
secrets: List of detected secrets
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
Sorted list of unique file paths
|
|
291
|
+
"""
|
|
292
|
+
files = {secret.file_path for secret in secrets}
|
|
293
|
+
return sorted(files)
|