permi 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- permi-0.1.1/PKG-INFO +9 -0
- permi-0.1.1/README.md +87 -0
- permi-0.1.1/ai_filter/__init__.py +0 -0
- permi-0.1.1/ai_filter/filter.py +71 -0
- permi-0.1.1/ai_filter/llm_client.py +123 -0
- permi-0.1.1/cli/__init__.py +0 -0
- permi-0.1.1/cli/formatter.py +123 -0
- permi-0.1.1/cli/main.py +133 -0
- permi-0.1.1/db/__init__.py +0 -0
- permi-0.1.1/db/database.py +95 -0
- permi-0.1.1/db/queries.py +112 -0
- permi-0.1.1/permi.egg-info/PKG-INFO +9 -0
- permi-0.1.1/permi.egg-info/SOURCES.txt +21 -0
- permi-0.1.1/permi.egg-info/dependency_links.txt +1 -0
- permi-0.1.1/permi.egg-info/entry_points.txt +2 -0
- permi-0.1.1/permi.egg-info/requires.txt +4 -0
- permi-0.1.1/permi.egg-info/top_level.txt +4 -0
- permi-0.1.1/pyproject.toml +21 -0
- permi-0.1.1/scanner/__init__.py +0 -0
- permi-0.1.1/scanner/engine.py +68 -0
- permi-0.1.1/scanner/rules.py +268 -0
- permi-0.1.1/scanner/scan.py +123 -0
- permi-0.1.1/setup.cfg +4 -0
permi-0.1.1/PKG-INFO
ADDED
permi-0.1.1/README.md
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Permi
|
|
2
|
+
|
|
3
|
+
**AI-powered vulnerability scanner for Nigerian developers and global SMBs.**
|
|
4
|
+
|
|
5
|
+
Permi scans your code for security vulnerabilities and uses AI to filter out
|
|
6
|
+
false positives — so you only see findings that actually matter.
|
|
7
|
+
|
|
8
|
+
Built in Nigeria. For Nigeria. Then for the world.
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## What Permi detects
|
|
13
|
+
|
|
14
|
+
- SQL Injection (string concatenation, f-strings, % formatting)
|
|
15
|
+
- Cross-Site Scripting (innerHTML, document.write, Jinja2 |safe)
|
|
16
|
+
- Hardcoded secrets (passwords, API keys, AWS keys, Paystack/Flutterwave keys)
|
|
17
|
+
- Insecure practices (eval/exec, pickle.loads, SSL verification disabled, debug mode)
|
|
18
|
+
- USSD vulnerabilities (Nigerian-specific — unvalidated sessionId, phoneNumber, serviceCode)
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
```bash
|
|
24
|
+
pip install permi
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Requires Python 3.9+
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Usage
|
|
32
|
+
|
|
33
|
+
**Scan a local project:**
|
|
34
|
+
```bash
|
|
35
|
+
permi scan --path ./myapp
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
**Scan a GitHub repository:**
|
|
39
|
+
```bash
|
|
40
|
+
permi scan --path https://github.com/user/repo
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
**Show only high severity findings:**
|
|
44
|
+
```bash
|
|
45
|
+
permi scan --path ./myapp --severity high
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
**Output as JSON (for CI/CD pipelines):**
|
|
49
|
+
```bash
|
|
50
|
+
permi scan --path ./myapp --output json
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
**Skip AI filter (offline mode):**
|
|
54
|
+
```bash
|
|
55
|
+
permi scan --path ./myapp --offline
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## Setup
|
|
61
|
+
|
|
62
|
+
Permi uses [OpenRouter](https://openrouter.ai) for AI-powered false positive
|
|
63
|
+
filtering. Create a free account, generate an API key, and add it to a `.env`
|
|
64
|
+
file in your project root:
|
|
65
|
+
OPENROUTER_API_KEY=sk-or-your-key-here
|
|
66
|
+
|
|
67
|
+
No API key? Use `--offline` mode. All findings are shown unfiltered.
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## Example output
|
|
72
|
+
[1] [HIGH] SQL001 SQL Injection — string concatenation
|
|
73
|
+
File : app/auth.py
|
|
74
|
+
Line : 42
|
|
75
|
+
Code : cursor.execute("SELECT * FROM users WHERE name = " + username)
|
|
76
|
+
Why : Raw string concatenation used to build a SQL query.
|
|
77
|
+
AI : REAL User input is directly embedded into a SQL query with no sanitisation.
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## Built by
|
|
82
|
+
|
|
83
|
+
Peter N. D. — Cybersecurity student, University of Jos, Nigeria.
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
*Permi is in active development. Feedback and contributions welcome.*
|
|
File without changes
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# ai_filter/filter.py
|
|
2
|
+
# Takes a list of raw findings, runs each through the LLM,
|
|
3
|
+
# saves the verdict back to the database, and returns only
|
|
4
|
+
# the findings the LLM marked as REAL.
|
|
5
|
+
|
|
6
|
+
from ai_filter.llm_client import analyse
|
|
7
|
+
from db.database import get_connection
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _update_finding_verdict(conn, finding_id: int, verdict: str, explanation: str) -> None:
|
|
11
|
+
"""Write the AI verdict back to the findings table."""
|
|
12
|
+
conn.execute("""
|
|
13
|
+
UPDATE findings
|
|
14
|
+
SET ai_verdict = ?,
|
|
15
|
+
ai_explanation = ?
|
|
16
|
+
WHERE id = ?
|
|
17
|
+
""", (verdict, explanation, finding_id))
|
|
18
|
+
conn.commit()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def run_filter(findings: list[dict], offline: bool = False) -> list[dict]:
|
|
22
|
+
"""
|
|
23
|
+
Run the AI filter over a list of findings.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
findings: Raw findings list from the scanner.
|
|
27
|
+
offline: If True, skip all API calls and return everything as REAL.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
Only the findings the LLM (or offline fallback) marked as REAL.
|
|
31
|
+
"""
|
|
32
|
+
if not findings:
|
|
33
|
+
return []
|
|
34
|
+
|
|
35
|
+
if offline:
|
|
36
|
+
print("[Permi] Offline mode — AI filter skipped, showing all findings.\n")
|
|
37
|
+
return findings
|
|
38
|
+
|
|
39
|
+
print(f"[Permi] Running AI filter on {len(findings)} finding(s)...\n")
|
|
40
|
+
|
|
41
|
+
conn = get_connection()
|
|
42
|
+
real = []
|
|
43
|
+
fp_count = 0
|
|
44
|
+
|
|
45
|
+
for i, finding in enumerate(findings, start=1):
|
|
46
|
+
print(f" [{i}/{len(findings)}] {finding['rule_id']} "
|
|
47
|
+
f"line {finding['line_number']} — ", end="", flush=True)
|
|
48
|
+
|
|
49
|
+
# Send to LLM
|
|
50
|
+
result = analyse(finding)
|
|
51
|
+
|
|
52
|
+
verdict = result["ai_verdict"]
|
|
53
|
+
explanation = result["ai_explanation"]
|
|
54
|
+
|
|
55
|
+
print(f"{verdict} {explanation}")
|
|
56
|
+
|
|
57
|
+
# Save verdict back to DB
|
|
58
|
+
if "id" in finding:
|
|
59
|
+
_update_finding_verdict(conn, finding["id"], verdict, explanation)
|
|
60
|
+
|
|
61
|
+
if verdict == "REAL":
|
|
62
|
+
real.append(result)
|
|
63
|
+
else:
|
|
64
|
+
fp_count += 1
|
|
65
|
+
|
|
66
|
+
conn.close()
|
|
67
|
+
|
|
68
|
+
print(f"\n[Permi] Filter complete — "
|
|
69
|
+
f"{len(real)} real | {fp_count} false positive(s) removed\n")
|
|
70
|
+
|
|
71
|
+
return real
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# ai_filter/llm_client.py
|
|
2
|
+
# The only module in Permi that makes network requests.
|
|
3
|
+
# Sends a finding to OpenRouter and returns a verdict:
|
|
4
|
+
# REAL — this is a genuine vulnerability
|
|
5
|
+
# FP — this is a false positive, ignore it
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import json
|
|
9
|
+
import requests
|
|
10
|
+
from dotenv import load_dotenv
|
|
11
|
+
|
|
12
|
+
load_dotenv()
|
|
13
|
+
|
|
14
|
+
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
|
|
15
|
+
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
|
|
16
|
+
|
|
17
|
+
# We use DeepSeek V3 — fast, cheap, and very good at code analysis.
|
|
18
|
+
# You can swap this for any model on openrouter.ai/models
|
|
19
|
+
MODEL = "deepseek/deepseek-chat"
|
|
20
|
+
|
|
21
|
+
# How many seconds to wait for the API before giving up
|
|
22
|
+
TIMEOUT = 30
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _build_prompt(finding: dict) -> str:
|
|
26
|
+
"""
|
|
27
|
+
Build the prompt we send to the LLM for a single finding.
|
|
28
|
+
The prompt is structured so the model returns a predictable format
|
|
29
|
+
we can parse reliably.
|
|
30
|
+
"""
|
|
31
|
+
return f"""You are a senior application security engineer reviewing automated scan results.
|
|
32
|
+
|
|
33
|
+
A static analysis tool flagged the following finding. Your job is to decide if this is a REAL vulnerability or a FALSE POSITIVE (FP).
|
|
34
|
+
|
|
35
|
+
--- FINDING ---
|
|
36
|
+
Rule : {finding['rule_id']} — {finding['rule_name']}
|
|
37
|
+
Severity : {finding['severity']}
|
|
38
|
+
File : {finding['file']}
|
|
39
|
+
Line : {finding['line_number']}
|
|
40
|
+
Code : {finding['line_content']}
|
|
41
|
+
Detail : {finding['description']}
|
|
42
|
+
---------------
|
|
43
|
+
|
|
44
|
+
Instructions:
|
|
45
|
+
- Answer with exactly one word on the first line: REAL or FP
|
|
46
|
+
- On the second line, write one short sentence (max 20 words) explaining your verdict
|
|
47
|
+
- Do not write anything else
|
|
48
|
+
|
|
49
|
+
Example response:
|
|
50
|
+
REAL
|
|
51
|
+
The string concatenation directly embeds user input into a SQL query with no sanitisation.
|
|
52
|
+
|
|
53
|
+
Your verdict:"""
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def analyse(finding: dict) -> dict:
|
|
57
|
+
"""
|
|
58
|
+
Send one finding to the LLM and return the finding dict
|
|
59
|
+
updated with ai_verdict and ai_explanation.
|
|
60
|
+
|
|
61
|
+
If the API call fails for any reason (no key, network error,
|
|
62
|
+
bad response), we default to REAL so nothing gets silently dropped.
|
|
63
|
+
"""
|
|
64
|
+
# If no API key is configured, skip the filter entirely
|
|
65
|
+
if not OPENROUTER_API_KEY:
|
|
66
|
+
finding["ai_verdict"] = "REAL"
|
|
67
|
+
finding["ai_explanation"] = "No API key — AI filter skipped."
|
|
68
|
+
return finding
|
|
69
|
+
|
|
70
|
+
prompt = _build_prompt(finding)
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
response = requests.post(
|
|
74
|
+
OPENROUTER_URL,
|
|
75
|
+
headers={
|
|
76
|
+
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
|
77
|
+
"Content-Type": "application/json",
|
|
78
|
+
"HTTP-Referer": "https://github.com/permi", # required by OpenRouter
|
|
79
|
+
"X-Title": "Permi Security Scanner",
|
|
80
|
+
},
|
|
81
|
+
json={
|
|
82
|
+
"model": MODEL,
|
|
83
|
+
"messages": [
|
|
84
|
+
{"role": "user", "content": prompt}
|
|
85
|
+
],
|
|
86
|
+
"temperature": 0, # we want deterministic, not creative
|
|
87
|
+
"max_tokens": 60, # verdict + one sentence is plenty
|
|
88
|
+
},
|
|
89
|
+
timeout=TIMEOUT,
|
|
90
|
+
)
|
|
91
|
+
response.raise_for_status()
|
|
92
|
+
|
|
93
|
+
except requests.exceptions.Timeout:
|
|
94
|
+
finding["ai_verdict"] = "REAL"
|
|
95
|
+
finding["ai_explanation"] = "API timeout — defaulting to REAL."
|
|
96
|
+
return finding
|
|
97
|
+
|
|
98
|
+
except requests.exceptions.RequestException as e:
|
|
99
|
+
finding["ai_verdict"] = "REAL"
|
|
100
|
+
finding["ai_explanation"] = f"API error — defaulting to REAL. ({e})"
|
|
101
|
+
return finding
|
|
102
|
+
|
|
103
|
+
# ── Parse the response ────────────────────────────────────────────────────
|
|
104
|
+
try:
|
|
105
|
+
content = response.json()["choices"][0]["message"]["content"].strip()
|
|
106
|
+
lines = content.splitlines()
|
|
107
|
+
|
|
108
|
+
verdict = lines[0].strip().upper()
|
|
109
|
+
explanation = lines[1].strip() if len(lines) > 1 else "No explanation provided."
|
|
110
|
+
|
|
111
|
+
# Normalise — if the model returns anything unexpected, treat as REAL
|
|
112
|
+
if verdict not in ("REAL", "FP"):
|
|
113
|
+
verdict = "REAL"
|
|
114
|
+
explanation = f"Unexpected verdict '{verdict}' — defaulting to REAL."
|
|
115
|
+
|
|
116
|
+
finding["ai_verdict"] = verdict
|
|
117
|
+
finding["ai_explanation"] = explanation
|
|
118
|
+
|
|
119
|
+
except (KeyError, IndexError, json.JSONDecodeError) as e:
|
|
120
|
+
finding["ai_verdict"] = "REAL"
|
|
121
|
+
finding["ai_explanation"] = f"Parse error — defaulting to REAL. ({e})"
|
|
122
|
+
|
|
123
|
+
return finding
|
|
File without changes
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# cli/formatter.py
|
|
2
|
+
# Handles all terminal output formatting.
|
|
3
|
+
# Keeps colour logic completely separate from scan logic.
|
|
4
|
+
|
|
5
|
+
from colorama import init, Fore, Style
|
|
6
|
+
|
|
7
|
+
# Initialise colorama — required on Windows for ANSI colours to work
|
|
8
|
+
init(autoreset=True)
|
|
9
|
+
|
|
10
|
+
# Severity colours
|
|
11
|
+
SEVERITY_COLOUR = {
|
|
12
|
+
"high": Fore.RED,
|
|
13
|
+
"medium": Fore.YELLOW,
|
|
14
|
+
"low": Fore.CYAN,
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
VERDICT_COLOUR = {
|
|
18
|
+
"REAL": Fore.RED,
|
|
19
|
+
"FP": Fore.GREEN,
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _divider(char="─", width=72, colour=Fore.WHITE):
|
|
24
|
+
print(colour + char * width + Style.RESET_ALL)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def print_banner():
|
|
28
|
+
"""Print the Permi header banner."""
|
|
29
|
+
print()
|
|
30
|
+
print(Fore.CYAN + Style.BRIGHT + "┌─────────────────────────────────────────┐")
|
|
31
|
+
print(Fore.CYAN + Style.BRIGHT + "│ Permi — Security Scanner │")
|
|
32
|
+
print(Fore.CYAN + Style.BRIGHT + "│ Built in Nigeria. For the World. │")
|
|
33
|
+
print(Fore.CYAN + Style.BRIGHT + "└─────────────────────────────────────────┘")
|
|
34
|
+
print()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def print_finding(finding: dict, index: int) -> None:
|
|
38
|
+
"""
|
|
39
|
+
Print a single finding as a formatted block.
|
|
40
|
+
Each finding gets a numbered header, severity badge,
|
|
41
|
+
file location, code snippet, AI verdict, and explanation.
|
|
42
|
+
"""
|
|
43
|
+
sev = finding.get("severity", "low")
|
|
44
|
+
colour = SEVERITY_COLOUR.get(sev, Fore.WHITE)
|
|
45
|
+
|
|
46
|
+
_divider()
|
|
47
|
+
|
|
48
|
+
# ── Header line ───────────────────────────────────────────────────────────
|
|
49
|
+
print(
|
|
50
|
+
Fore.WHITE + Style.BRIGHT + f" [{index}] " +
|
|
51
|
+
colour + Style.BRIGHT + f"[{sev.upper()}] " +
|
|
52
|
+
Fore.WHITE + Style.BRIGHT + finding.get("rule_id", "") +
|
|
53
|
+
Style.RESET_ALL + " " +
|
|
54
|
+
finding.get("rule_name", "")
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
print()
|
|
58
|
+
|
|
59
|
+
# ── File and line ─────────────────────────────────────────────────────────
|
|
60
|
+
print(
|
|
61
|
+
Fore.WHITE + " File : " + Style.RESET_ALL +
|
|
62
|
+
finding.get("file", "unknown")
|
|
63
|
+
)
|
|
64
|
+
print(
|
|
65
|
+
Fore.WHITE + " Line : " + Style.RESET_ALL +
|
|
66
|
+
str(finding.get("line_number", "?"))
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# ── Code snippet ──────────────────────────────────────────────────────────
|
|
70
|
+
print(
|
|
71
|
+
Fore.WHITE + " Code : " + Style.RESET_ALL +
|
|
72
|
+
Fore.YELLOW + finding.get("line_content", "") + Style.RESET_ALL
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# ── Description ───────────────────────────────────────────────────────────
|
|
76
|
+
print(
|
|
77
|
+
Fore.WHITE + " Why : " + Style.RESET_ALL +
|
|
78
|
+
finding.get("description", "")
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# ── AI verdict ────────────────────────────────────────────────────────────
|
|
82
|
+
verdict = finding.get("ai_verdict")
|
|
83
|
+
if verdict:
|
|
84
|
+
v_colour = VERDICT_COLOUR.get(verdict, Fore.WHITE)
|
|
85
|
+
print(
|
|
86
|
+
Fore.WHITE + " AI : " +
|
|
87
|
+
v_colour + Style.BRIGHT + verdict + Style.RESET_ALL +
|
|
88
|
+
" " + finding.get("ai_explanation", "")
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
print()
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def print_results_human(findings: list[dict]) -> None:
|
|
95
|
+
"""Print all findings in human-readable coloured format."""
|
|
96
|
+
if not findings:
|
|
97
|
+
print(Fore.GREEN + Style.BRIGHT + "\n ✅ No real vulnerabilities found.\n")
|
|
98
|
+
return
|
|
99
|
+
|
|
100
|
+
for i, finding in enumerate(findings, start=1):
|
|
101
|
+
print_finding(finding, i)
|
|
102
|
+
|
|
103
|
+
_divider()
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def print_summary(findings: list[dict], raw_count: int) -> None:
|
|
107
|
+
"""Print the final summary block."""
|
|
108
|
+
high = sum(1 for f in findings if f["severity"] == "high")
|
|
109
|
+
medium = sum(1 for f in findings if f["severity"] == "medium")
|
|
110
|
+
low = sum(1 for f in findings if f["severity"] == "low")
|
|
111
|
+
fp = raw_count - len(findings)
|
|
112
|
+
|
|
113
|
+
print()
|
|
114
|
+
_divider("═")
|
|
115
|
+
print(Fore.WHITE + Style.BRIGHT + " SCAN SUMMARY")
|
|
116
|
+
_divider("═")
|
|
117
|
+
print(f" Total findings : {Style.BRIGHT}{len(findings)}{Style.RESET_ALL} "
|
|
118
|
+
f"(filtered {fp} false positive(s))")
|
|
119
|
+
print(f" {Fore.RED}High : {high}{Style.RESET_ALL}")
|
|
120
|
+
print(f" {Fore.YELLOW}Medium : {medium}{Style.RESET_ALL}")
|
|
121
|
+
print(f" {Fore.CYAN}Low : {low}{Style.RESET_ALL}")
|
|
122
|
+
_divider("═")
|
|
123
|
+
print()
|
permi-0.1.1/cli/main.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# cli/main.py
|
|
2
|
+
import json
|
|
3
|
+
import sys
|
|
4
|
+
import click
|
|
5
|
+
from colorama import Fore, Style, init
|
|
6
|
+
|
|
7
|
+
init(autoreset=True)
|
|
8
|
+
|
|
9
|
+
from cli.formatter import print_banner, print_results_human, print_summary
|
|
10
|
+
from scanner.scan import scan
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@click.group()
|
|
14
|
+
def cli():
|
|
15
|
+
"""
|
|
16
|
+
Permi — AI-powered vulnerability scanner.
|
|
17
|
+
|
|
18
|
+
Scans code for vulnerabilities and uses AI to filter out false
|
|
19
|
+
positives so you only see findings that actually matter.
|
|
20
|
+
|
|
21
|
+
Built in Nigeria. For Nigeria. Then for the world.
|
|
22
|
+
"""
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@cli.command()
|
|
27
|
+
@click.option("--path", "-p", required=True,
|
|
28
|
+
help="Local directory path or GitHub URL to scan.")
|
|
29
|
+
@click.option("--output", "-o",
|
|
30
|
+
type=click.Choice(["human", "json"], case_sensitive=False),
|
|
31
|
+
default="human", show_default=True,
|
|
32
|
+
help="Output format.")
|
|
33
|
+
@click.option("--severity", "-s",
|
|
34
|
+
type=click.Choice(["high", "medium", "low", "all"], case_sensitive=False),
|
|
35
|
+
default="all", show_default=True,
|
|
36
|
+
help="Minimum severity level to display.")
|
|
37
|
+
@click.option("--offline", is_flag=True, default=False,
|
|
38
|
+
help="Skip AI filter and show all raw findings.")
|
|
39
|
+
@click.option("--project", default=None,
|
|
40
|
+
help="Project name to store in the database.")
|
|
41
|
+
def scan_cmd(path, output, severity, offline, project):
|
|
42
|
+
"""
|
|
43
|
+
Scan a local directory or GitHub repo for vulnerabilities.
|
|
44
|
+
|
|
45
|
+
Permi detects SQL injection, XSS, hardcoded secrets, insecure
|
|
46
|
+
practices, and USSD vulnerabilities. An AI filter then removes
|
|
47
|
+
false positives so only real issues are shown.
|
|
48
|
+
|
|
49
|
+
\b
|
|
50
|
+
EXAMPLES
|
|
51
|
+
|
|
52
|
+
Scan a local project:
|
|
53
|
+
permi scan --path ./myapp
|
|
54
|
+
|
|
55
|
+
Scan a GitHub repo:
|
|
56
|
+
permi scan --path https://github.com/user/repo
|
|
57
|
+
|
|
58
|
+
High severity only:
|
|
59
|
+
permi scan --path ./myapp --severity high
|
|
60
|
+
|
|
61
|
+
Export as JSON for CI/CD:
|
|
62
|
+
permi scan --path ./myapp --output json
|
|
63
|
+
|
|
64
|
+
Skip AI filter (no API key needed):
|
|
65
|
+
permi scan --path ./myapp --offline
|
|
66
|
+
|
|
67
|
+
Name your project in the database:
|
|
68
|
+
permi scan --path ./myapp --project my-api
|
|
69
|
+
|
|
70
|
+
\b
|
|
71
|
+
SEVERITY LEVELS
|
|
72
|
+
|
|
73
|
+
high — SQL injection, hardcoded secrets, eval(), XSS, SSL disabled
|
|
74
|
+
medium — debug mode, USSD input issues
|
|
75
|
+
low — informational findings
|
|
76
|
+
all — everything (default)
|
|
77
|
+
|
|
78
|
+
\b
|
|
79
|
+
EXIT CODES
|
|
80
|
+
|
|
81
|
+
0 No high severity findings
|
|
82
|
+
1 At least one high severity finding (useful for CI/CD pipelines)
|
|
83
|
+
"""
|
|
84
|
+
if output == "human":
|
|
85
|
+
print_banner()
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
# Run the full scan pipeline
|
|
89
|
+
findings, raw_count = scan(
|
|
90
|
+
path=path,
|
|
91
|
+
project_name=project,
|
|
92
|
+
offline=offline,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# ── Severity filter ───────────────────────────────────────────────────
|
|
96
|
+
order = {"high": 1, "medium": 2, "low": 3}
|
|
97
|
+
|
|
98
|
+
if severity != "all":
|
|
99
|
+
level = order[severity]
|
|
100
|
+
findings = [
|
|
101
|
+
f for f in findings
|
|
102
|
+
if isinstance(f, dict) and order.get(f.get("severity", "low"), 99) <= level
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
# ── Output ────────────────────────────────────────────────────────────
|
|
106
|
+
if output == "json":
|
|
107
|
+
clean = [
|
|
108
|
+
{k: v for k, v in f.items() if v is not None}
|
|
109
|
+
for f in findings
|
|
110
|
+
if isinstance(f, dict)
|
|
111
|
+
]
|
|
112
|
+
click.echo(json.dumps(clean, indent=2))
|
|
113
|
+
|
|
114
|
+
else:
|
|
115
|
+
print_results_human(findings)
|
|
116
|
+
print_summary(findings, raw_count=raw_count)
|
|
117
|
+
|
|
118
|
+
# Exit code 1 if any high severity findings — used by GitHub Action
|
|
119
|
+
if any(f.get("severity") == "high" for f in findings if isinstance(f, dict)):
|
|
120
|
+
sys.exit(1)
|
|
121
|
+
|
|
122
|
+
except FileNotFoundError as e:
|
|
123
|
+
click.echo(Fore.RED + f"\n[Error] {e}\n")
|
|
124
|
+
sys.exit(1)
|
|
125
|
+
except NotADirectoryError as e:
|
|
126
|
+
click.echo(Fore.RED + f"\n[Error] {e}\n")
|
|
127
|
+
sys.exit(1)
|
|
128
|
+
except Exception as e:
|
|
129
|
+
click.echo(Fore.RED + f"\n[Unexpected error] {e}\n")
|
|
130
|
+
sys.exit(1)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
cli.add_command(scan_cmd, name="scan")
|
|
File without changes
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# db/database.py
|
|
2
|
+
# Handles all database creation and connection logic.
|
|
3
|
+
# The database file (permi.db) is created automatically in the
|
|
4
|
+
# project root the first time this module is imported.
|
|
5
|
+
|
|
6
|
+
import sqlite3
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
# The database lives in the project root folder
|
|
10
|
+
DB_PATH = Path(__file__).parent.parent / "permi.db"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_connection() -> sqlite3.Connection:
|
|
14
|
+
"""
|
|
15
|
+
Open and return a connection to the local SQLite database.
|
|
16
|
+
Sets row_factory so rows behave like dictionaries — you can
|
|
17
|
+
access columns by name (row['severity']) instead of index (row[2]).
|
|
18
|
+
"""
|
|
19
|
+
conn = sqlite3.connect(DB_PATH)
|
|
20
|
+
conn.row_factory = sqlite3.Row
|
|
21
|
+
conn.execute("PRAGMA foreign_keys = ON") # enforce relationships
|
|
22
|
+
return conn
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def init_db() -> None:
|
|
26
|
+
"""
|
|
27
|
+
Create all tables if they don't already exist.
|
|
28
|
+
Safe to call every time the app starts — won't overwrite existing data.
|
|
29
|
+
"""
|
|
30
|
+
conn = get_connection()
|
|
31
|
+
|
|
32
|
+
with conn:
|
|
33
|
+
|
|
34
|
+
# ── projects ──────────────────────────────────────────────────────────
|
|
35
|
+
# One row per codebase you want to scan repeatedly.
|
|
36
|
+
conn.execute("""
|
|
37
|
+
CREATE TABLE IF NOT EXISTS projects (
|
|
38
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
39
|
+
name TEXT NOT NULL,
|
|
40
|
+
path TEXT NOT NULL,
|
|
41
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
42
|
+
last_scan TEXT
|
|
43
|
+
)
|
|
44
|
+
""")
|
|
45
|
+
|
|
46
|
+
# ── scan_results ──────────────────────────────────────────────────────
|
|
47
|
+
# One row per scan run. Links back to the project that was scanned.
|
|
48
|
+
conn.execute("""
|
|
49
|
+
CREATE TABLE IF NOT EXISTS scan_results (
|
|
50
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
51
|
+
project_id INTEGER NOT NULL REFERENCES projects(id),
|
|
52
|
+
started_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
53
|
+
finished_at TEXT,
|
|
54
|
+
total_files INTEGER DEFAULT 0,
|
|
55
|
+
total_findings INTEGER DEFAULT 0,
|
|
56
|
+
status TEXT DEFAULT 'running'
|
|
57
|
+
)
|
|
58
|
+
""")
|
|
59
|
+
|
|
60
|
+
# ── findings ──────────────────────────────────────────────────────────
|
|
61
|
+
# One row per vulnerability found in a scan.
|
|
62
|
+
# ai_verdict and ai_explanation are NULL until Phase 1 fills them in.
|
|
63
|
+
conn.execute("""
|
|
64
|
+
CREATE TABLE IF NOT EXISTS findings (
|
|
65
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
66
|
+
scan_id INTEGER NOT NULL REFERENCES scan_results(id),
|
|
67
|
+
rule_id TEXT NOT NULL,
|
|
68
|
+
rule_name TEXT NOT NULL,
|
|
69
|
+
severity TEXT NOT NULL,
|
|
70
|
+
description TEXT,
|
|
71
|
+
file TEXT NOT NULL,
|
|
72
|
+
line_number INTEGER NOT NULL,
|
|
73
|
+
line_content TEXT,
|
|
74
|
+
ai_verdict TEXT,
|
|
75
|
+
ai_explanation TEXT,
|
|
76
|
+
fix_suggestion TEXT,
|
|
77
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
78
|
+
)
|
|
79
|
+
""")
|
|
80
|
+
|
|
81
|
+
# ── feedback ──────────────────────────────────────────────────────────
|
|
82
|
+
# Stores manual corrections from the user.
|
|
83
|
+
# 'confirmed' = real vulnerability, 'false_positive' = not a real issue.
|
|
84
|
+
conn.execute("""
|
|
85
|
+
CREATE TABLE IF NOT EXISTS feedback (
|
|
86
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
87
|
+
finding_id INTEGER NOT NULL REFERENCES findings(id),
|
|
88
|
+
verdict TEXT NOT NULL CHECK(verdict IN ('confirmed', 'false_positive')),
|
|
89
|
+
note TEXT,
|
|
90
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
91
|
+
)
|
|
92
|
+
""")
|
|
93
|
+
|
|
94
|
+
conn.close()
|
|
95
|
+
print(f"Database ready: {DB_PATH}")
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# db/queries.py
|
|
2
|
+
# All database read/write functions used by the scanner.
|
|
3
|
+
# Every function takes a connection as its first argument
|
|
4
|
+
# so the caller controls when to open and close it.
|
|
5
|
+
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def create_project(conn, name: str, path: str) -> int:
|
|
10
|
+
"""
|
|
11
|
+
Insert a new project and return its ID.
|
|
12
|
+
If a project with the same path already exists, return its ID instead.
|
|
13
|
+
"""
|
|
14
|
+
existing = conn.execute(
|
|
15
|
+
"SELECT id FROM projects WHERE path = ?", (path,)
|
|
16
|
+
).fetchone()
|
|
17
|
+
|
|
18
|
+
if existing:
|
|
19
|
+
return existing["id"]
|
|
20
|
+
|
|
21
|
+
cursor = conn.execute(
|
|
22
|
+
"INSERT INTO projects (name, path) VALUES (?, ?)",
|
|
23
|
+
(name, path)
|
|
24
|
+
)
|
|
25
|
+
conn.commit()
|
|
26
|
+
return cursor.lastrowid
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def start_scan(conn, project_id: int) -> int:
|
|
30
|
+
"""
|
|
31
|
+
Create a new scan_results row with status 'running'.
|
|
32
|
+
Returns the scan ID.
|
|
33
|
+
"""
|
|
34
|
+
cursor = conn.execute(
|
|
35
|
+
"INSERT INTO scan_results (project_id, status) VALUES (?, 'running')",
|
|
36
|
+
(project_id,)
|
|
37
|
+
)
|
|
38
|
+
conn.commit()
|
|
39
|
+
return cursor.lastrowid
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def finish_scan(conn, scan_id: int, total_files: int, total_findings: int) -> None:
|
|
43
|
+
"""
|
|
44
|
+
Mark a scan as completed and record the final counts.
|
|
45
|
+
"""
|
|
46
|
+
conn.execute("""
|
|
47
|
+
UPDATE scan_results
|
|
48
|
+
SET status = 'completed',
|
|
49
|
+
finished_at = datetime('now'),
|
|
50
|
+
total_files = ?,
|
|
51
|
+
total_findings = ?
|
|
52
|
+
WHERE id = ?
|
|
53
|
+
""", (total_files, total_findings, scan_id))
|
|
54
|
+
|
|
55
|
+
conn.commit()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def save_finding(conn, scan_id: int, finding: dict) -> int:
|
|
59
|
+
"""
|
|
60
|
+
Insert one finding into the findings table.
|
|
61
|
+
Returns the new finding's ID.
|
|
62
|
+
"""
|
|
63
|
+
cursor = conn.execute("""
|
|
64
|
+
INSERT INTO findings (
|
|
65
|
+
scan_id, rule_id, rule_name, severity, description,
|
|
66
|
+
file, line_number, line_content, ai_verdict, ai_explanation
|
|
67
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
68
|
+
""", (
|
|
69
|
+
scan_id,
|
|
70
|
+
finding["rule_id"],
|
|
71
|
+
finding["rule_name"],
|
|
72
|
+
finding["severity"],
|
|
73
|
+
finding["description"],
|
|
74
|
+
finding["file"],
|
|
75
|
+
finding["line_number"],
|
|
76
|
+
finding["line_content"],
|
|
77
|
+
finding.get("ai_verdict"),
|
|
78
|
+
finding.get("ai_explanation"),
|
|
79
|
+
))
|
|
80
|
+
conn.commit()
|
|
81
|
+
return cursor.lastrowid
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def get_findings_for_scan(conn, scan_id: int) -> list:
|
|
85
|
+
"""
|
|
86
|
+
Retrieve all findings for a given scan, ordered by severity then file.
|
|
87
|
+
"""
|
|
88
|
+
rows = conn.execute("""
|
|
89
|
+
SELECT * FROM findings
|
|
90
|
+
WHERE scan_id = ?
|
|
91
|
+
ORDER BY
|
|
92
|
+
CASE severity
|
|
93
|
+
WHEN 'high' THEN 1
|
|
94
|
+
WHEN 'medium' THEN 2
|
|
95
|
+
WHEN 'low' THEN 3
|
|
96
|
+
ELSE 4
|
|
97
|
+
END,
|
|
98
|
+
file, line_number
|
|
99
|
+
""", (scan_id,)).fetchall()
|
|
100
|
+
|
|
101
|
+
return [dict(row) for row in rows]
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def update_last_scan(conn, project_id: int) -> None:
|
|
105
|
+
"""
|
|
106
|
+
Update the last_scan timestamp on a project after a scan completes.
|
|
107
|
+
"""
|
|
108
|
+
conn.execute(
|
|
109
|
+
"UPDATE projects SET last_scan = datetime('now') WHERE id = ?",
|
|
110
|
+
(project_id,)
|
|
111
|
+
)
|
|
112
|
+
conn.commit()
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
ai_filter/__init__.py
|
|
4
|
+
ai_filter/filter.py
|
|
5
|
+
ai_filter/llm_client.py
|
|
6
|
+
cli/__init__.py
|
|
7
|
+
cli/formatter.py
|
|
8
|
+
cli/main.py
|
|
9
|
+
db/__init__.py
|
|
10
|
+
db/database.py
|
|
11
|
+
db/queries.py
|
|
12
|
+
permi.egg-info/PKG-INFO
|
|
13
|
+
permi.egg-info/SOURCES.txt
|
|
14
|
+
permi.egg-info/dependency_links.txt
|
|
15
|
+
permi.egg-info/entry_points.txt
|
|
16
|
+
permi.egg-info/requires.txt
|
|
17
|
+
permi.egg-info/top_level.txt
|
|
18
|
+
scanner/__init__.py
|
|
19
|
+
scanner/engine.py
|
|
20
|
+
scanner/rules.py
|
|
21
|
+
scanner/scan.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=42"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "permi"
|
|
7
|
+
version = "0.1.1"
|
|
8
|
+
description = "AI-powered vulnerability scanner for Nigerian developers"
|
|
9
|
+
requires-python = ">=3.9"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"python-dotenv",
|
|
12
|
+
"colorama",
|
|
13
|
+
"click",
|
|
14
|
+
"requests",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
[project.scripts]
|
|
18
|
+
permi = "cli.main:cli"
|
|
19
|
+
|
|
20
|
+
[tool.setuptools.packages.find]
|
|
21
|
+
include = ["scanner*", "ai_filter*", "cli*", "db*"]
|
|
File without changes
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# scanner/engine.py
|
|
2
|
+
# The engine takes a file path, reads it line by line,
|
|
3
|
+
# and tests every line against every rule in RULES.
|
|
4
|
+
# Returns a list of finding dictionaries.
|
|
5
|
+
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from .rules import RULES, SCANNABLE_EXTENSIONS, SKIP_DIRS
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def scan_file(file_path: Path) -> list[dict]:
|
|
11
|
+
"""
|
|
12
|
+
Scan a single file against all rules.
|
|
13
|
+
Returns a list of findings (may be empty).
|
|
14
|
+
"""
|
|
15
|
+
findings = []
|
|
16
|
+
|
|
17
|
+
# Skip files with extensions we don't handle
|
|
18
|
+
if file_path.suffix.lower() not in SCANNABLE_EXTENSIONS:
|
|
19
|
+
return findings
|
|
20
|
+
|
|
21
|
+
try:
|
|
22
|
+
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
23
|
+
except Exception:
|
|
24
|
+
# If the file can't be read for any reason, skip it silently
|
|
25
|
+
return findings
|
|
26
|
+
|
|
27
|
+
lines = content.splitlines()
|
|
28
|
+
|
|
29
|
+
for line_number, line in enumerate(lines, start=1):
|
|
30
|
+
for rule in RULES:
|
|
31
|
+
if rule["pattern"].search(line):
|
|
32
|
+
findings.append({
|
|
33
|
+
"rule_id": rule["id"],
|
|
34
|
+
"rule_name": rule["name"],
|
|
35
|
+
"severity": rule["severity"],
|
|
36
|
+
"description": rule["description"],
|
|
37
|
+
"file": str(file_path),
|
|
38
|
+
"line_number": line_number,
|
|
39
|
+
"line_content": line.strip(),
|
|
40
|
+
"ai_verdict": None, # filled in Phase 1
|
|
41
|
+
"ai_explanation": None, # filled in Phase 1
|
|
42
|
+
})
|
|
43
|
+
|
|
44
|
+
return findings
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def scan_directory(directory: Path) -> list[dict]:
|
|
48
|
+
"""
|
|
49
|
+
Recursively walk a directory, scan every eligible file,
|
|
50
|
+
and return all findings combined.
|
|
51
|
+
"""
|
|
52
|
+
all_findings = []
|
|
53
|
+
directory = Path(directory).resolve()
|
|
54
|
+
|
|
55
|
+
for file_path in directory.rglob("*"):
|
|
56
|
+
|
|
57
|
+
# Skip any path that passes through a blocked directory
|
|
58
|
+
if any(skip in file_path.parts for skip in SKIP_DIRS):
|
|
59
|
+
continue
|
|
60
|
+
|
|
61
|
+
# Skip directories themselves — only process files
|
|
62
|
+
if not file_path.is_file():
|
|
63
|
+
continue
|
|
64
|
+
|
|
65
|
+
findings = scan_file(file_path)
|
|
66
|
+
all_findings.extend(findings)
|
|
67
|
+
|
|
68
|
+
return all_findings
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
# scanner/rules.py
|
|
2
|
+
# Each rule has: id, name, pattern (regex), severity, description
|
|
3
|
+
# Patterns are intentionally broad — the AI filter in Phase 1 will
|
|
4
|
+
# remove false positives. Better to over-catch than under-catch here.
|
|
5
|
+
|
|
6
|
+
import re
|
|
7
|
+
|
|
8
|
+
RULES = [
|
|
9
|
+
|
|
10
|
+
# ── SQL INJECTION ─────────────────────────────────────────────────────────
|
|
11
|
+
|
|
12
|
+
{
|
|
13
|
+
"id": "SQL001",
|
|
14
|
+
"name": "SQL Injection — string concatenation",
|
|
15
|
+
"severity": "high",
|
|
16
|
+
"description": (
|
|
17
|
+
"Raw string concatenation used to build a SQL query. "
|
|
18
|
+
"An attacker can inject arbitrary SQL through user input."
|
|
19
|
+
),
|
|
20
|
+
"pattern": re.compile(
|
|
21
|
+
r'(execute|query|cursor\.execute)\s*\(\s*["\'].*\+',
|
|
22
|
+
re.IGNORECASE
|
|
23
|
+
),
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
"id": "SQL002",
|
|
27
|
+
"name": "SQL Injection — f-string or format() in query",
|
|
28
|
+
"severity": "high",
|
|
29
|
+
"description": (
|
|
30
|
+
"An f-string or .format() call is used inside a SQL query. "
|
|
31
|
+
"User-controlled variables embedded this way are injectable."
|
|
32
|
+
),
|
|
33
|
+
"pattern": re.compile(
|
|
34
|
+
r'(execute|query|cursor\.execute)\s*\(\s*f["\']',
|
|
35
|
+
re.IGNORECASE
|
|
36
|
+
),
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
"id": "SQL003",
|
|
40
|
+
"name": "SQL Injection — % formatting in query",
|
|
41
|
+
"severity": "high",
|
|
42
|
+
"description": (
|
|
43
|
+
"% string formatting is used to build a SQL query. "
|
|
44
|
+
"This is a classic injection vector."
|
|
45
|
+
),
|
|
46
|
+
"pattern": re.compile(
|
|
47
|
+
r'(execute|query)\s*\(\s*["\'].*%\s*[(\w]',
|
|
48
|
+
re.IGNORECASE
|
|
49
|
+
),
|
|
50
|
+
},
|
|
51
|
+
|
|
52
|
+
# ── CROSS-SITE SCRIPTING (XSS) ────────────────────────────────────────────
|
|
53
|
+
|
|
54
|
+
{
|
|
55
|
+
"id": "XSS001",
|
|
56
|
+
"name": "XSS — innerHTML assignment",
|
|
57
|
+
"severity": "high",
|
|
58
|
+
"description": (
|
|
59
|
+
"innerHTML is set dynamically. If any part of the value comes "
|
|
60
|
+
"from user input, this is a direct XSS vector."
|
|
61
|
+
),
|
|
62
|
+
"pattern": re.compile(
|
|
63
|
+
r'\.innerHTML\s*=',
|
|
64
|
+
re.IGNORECASE
|
|
65
|
+
),
|
|
66
|
+
},
|
|
67
|
+
{
|
|
68
|
+
"id": "XSS002",
|
|
69
|
+
"name": "XSS — document.write with variable",
|
|
70
|
+
"severity": "high",
|
|
71
|
+
"description": (
|
|
72
|
+
"document.write() is called with a variable. "
|
|
73
|
+
"Writing user-controlled content to the page enables XSS."
|
|
74
|
+
),
|
|
75
|
+
"pattern": re.compile(
|
|
76
|
+
r'document\.write\s*\(\s*\w',
|
|
77
|
+
re.IGNORECASE
|
|
78
|
+
),
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
"id": "XSS003",
|
|
82
|
+
"name": "XSS — Flask render without escape (Jinja2 | safe filter)",
|
|
83
|
+
"severity": "medium",
|
|
84
|
+
"description": (
|
|
85
|
+
"The Jinja2 |safe filter disables auto-escaping. "
|
|
86
|
+
"If the variable contains user input, this enables XSS."
|
|
87
|
+
),
|
|
88
|
+
"pattern": re.compile(
|
|
89
|
+
r'\|\s*safe',
|
|
90
|
+
re.IGNORECASE
|
|
91
|
+
),
|
|
92
|
+
},
|
|
93
|
+
|
|
94
|
+
# ── HARDCODED SECRETS ─────────────────────────────────────────────────────
|
|
95
|
+
|
|
96
|
+
{
|
|
97
|
+
"id": "SEC001",
|
|
98
|
+
"name": "Hardcoded secret — generic password or key assignment",
|
|
99
|
+
"severity": "high",
|
|
100
|
+
"description": (
|
|
101
|
+
"A variable named password, secret, api_key, or token is "
|
|
102
|
+
"assigned a string literal. Hardcoded credentials are a "
|
|
103
|
+
"critical exposure risk if the code is shared or pushed."
|
|
104
|
+
),
|
|
105
|
+
"pattern": re.compile(
|
|
106
|
+
r'(password|passwd|secret|api_key|apikey|token|auth_key)'
|
|
107
|
+
r'\s*=\s*["\'][^"\']{4,}["\']',
|
|
108
|
+
re.IGNORECASE
|
|
109
|
+
),
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
"id": "SEC002",
|
|
113
|
+
"name": "Hardcoded secret — AWS key pattern",
|
|
114
|
+
"severity": "high",
|
|
115
|
+
"description": (
|
|
116
|
+
"A string matching the format of an AWS Access Key ID was found. "
|
|
117
|
+
"Exposed AWS keys can lead to full account compromise."
|
|
118
|
+
),
|
|
119
|
+
"pattern": re.compile(
|
|
120
|
+
r'AKIA[0-9A-Z]{16}',
|
|
121
|
+
),
|
|
122
|
+
},
|
|
123
|
+
{
|
|
124
|
+
"id": "SEC003",
|
|
125
|
+
"name": "Hardcoded secret — private key header",
|
|
126
|
+
"severity": "high",
|
|
127
|
+
"description": (
|
|
128
|
+
"A PEM private key header was found in the source code. "
|
|
129
|
+
"Private keys must never be committed to a repository."
|
|
130
|
+
),
|
|
131
|
+
"pattern": re.compile(
|
|
132
|
+
r'-----BEGIN (RSA |EC |OPENSSH )?PRIVATE KEY-----',
|
|
133
|
+
),
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
"id": "SEC004",
|
|
137
|
+
"name": "Hardcoded secret — Paystack or Flutterwave secret key",
|
|
138
|
+
"severity": "high",
|
|
139
|
+
"description": (
|
|
140
|
+
"A Paystack or Flutterwave secret key pattern was found. "
|
|
141
|
+
"Exposed payment gateway keys allow fraudulent transactions."
|
|
142
|
+
),
|
|
143
|
+
"pattern": re.compile(
|
|
144
|
+
r'(sk_live_|sk_test_)[a-zA-Z0-9]{20,}',
|
|
145
|
+
),
|
|
146
|
+
},
|
|
147
|
+
|
|
148
|
+
# ── USSD / NIGERIAN-SPECIFIC ──────────────────────────────────────────────
|
|
149
|
+
|
|
150
|
+
{
|
|
151
|
+
"id": "USSD001",
|
|
152
|
+
"name": "USSD — missing input validation on sessionId or phoneNumber",
|
|
153
|
+
"severity": "medium",
|
|
154
|
+
"description": (
|
|
155
|
+
"A USSD handler accesses sessionId or phoneNumber from the "
|
|
156
|
+
"request without any visible validation. Unvalidated USSD "
|
|
157
|
+
"inputs can be manipulated to hijack sessions or spoof callers."
|
|
158
|
+
),
|
|
159
|
+
"pattern": re.compile(
|
|
160
|
+
r'request\.(get|form|json|args)\s*[\.\[]\s*'
|
|
161
|
+
r'["\']?(sessionId|phoneNumber|serviceCode)["\']?',
|
|
162
|
+
re.IGNORECASE
|
|
163
|
+
),
|
|
164
|
+
},
|
|
165
|
+
{
|
|
166
|
+
"id": "USSD002",
|
|
167
|
+
"name": "USSD — wildcard or open-ended serviceCode handling",
|
|
168
|
+
"severity": "medium",
|
|
169
|
+
"description": (
|
|
170
|
+
"A USSD serviceCode is compared to a wildcard or catch-all "
|
|
171
|
+
"value. This may allow unintended service codes to trigger "
|
|
172
|
+
"application logic."
|
|
173
|
+
),
|
|
174
|
+
"pattern": re.compile(
|
|
175
|
+
r'serviceCode\s*[=!]=\s*["\'][*\?]["\']',
|
|
176
|
+
re.IGNORECASE
|
|
177
|
+
),
|
|
178
|
+
},
|
|
179
|
+
|
|
180
|
+
# ── INSECURE PRACTICES ────────────────────────────────────────────────────
|
|
181
|
+
|
|
182
|
+
{
|
|
183
|
+
"id": "INS001",
|
|
184
|
+
"name": "Insecure — debug mode enabled in production",
|
|
185
|
+
"severity": "medium",
|
|
186
|
+
"description": (
|
|
187
|
+
"debug=True is set, likely in a Flask or Django app. "
|
|
188
|
+
"Debug mode exposes stack traces and an interactive console "
|
|
189
|
+
"to anyone who triggers an error."
|
|
190
|
+
),
|
|
191
|
+
"pattern": re.compile(
|
|
192
|
+
r'debug\s*=\s*True',
|
|
193
|
+
re.IGNORECASE
|
|
194
|
+
),
|
|
195
|
+
},
|
|
196
|
+
{
|
|
197
|
+
"id": "INS002",
|
|
198
|
+
"name": "Insecure — SSL/TLS verification disabled",
|
|
199
|
+
"severity": "high",
|
|
200
|
+
"description": (
|
|
201
|
+
"verify=False is passed to a requests call. "
|
|
202
|
+
"This disables certificate validation and exposes the app "
|
|
203
|
+
"to man-in-the-middle attacks."
|
|
204
|
+
),
|
|
205
|
+
"pattern": re.compile(
|
|
206
|
+
r'requests\.\w+\(.*verify\s*=\s*False',
|
|
207
|
+
re.IGNORECASE
|
|
208
|
+
),
|
|
209
|
+
},
|
|
210
|
+
{
|
|
211
|
+
"id": "INS003",
|
|
212
|
+
"name": "Insecure — use of eval() on external input",
|
|
213
|
+
"severity": "high",
|
|
214
|
+
"description": (
|
|
215
|
+
"eval() is called with a variable argument. If the variable "
|
|
216
|
+
"contains user-supplied data, this allows arbitrary code execution."
|
|
217
|
+
),
|
|
218
|
+
"pattern": re.compile(
|
|
219
|
+
r'eval\s*\(\s*\w',
|
|
220
|
+
re.IGNORECASE
|
|
221
|
+
),
|
|
222
|
+
},
|
|
223
|
+
{
|
|
224
|
+
"id": "INS004",
|
|
225
|
+
"name": "Insecure — use of exec() on external input",
|
|
226
|
+
"severity": "high",
|
|
227
|
+
"description": (
|
|
228
|
+
"exec() is called with a variable argument — same risk as eval()."
|
|
229
|
+
),
|
|
230
|
+
"pattern": re.compile(
|
|
231
|
+
r'exec\s*\(\s*\w',
|
|
232
|
+
re.IGNORECASE
|
|
233
|
+
),
|
|
234
|
+
},
|
|
235
|
+
{
|
|
236
|
+
"id": "INS005",
|
|
237
|
+
"name": "Insecure — pickle.loads() on untrusted data",
|
|
238
|
+
"severity": "high",
|
|
239
|
+
"description": (
|
|
240
|
+
"pickle.loads() deserializes data. If the data comes from "
|
|
241
|
+
"an untrusted source (network, user upload), this allows "
|
|
242
|
+
"arbitrary code execution."
|
|
243
|
+
),
|
|
244
|
+
"pattern": re.compile(
|
|
245
|
+
r'pickle\.loads\s*\(',
|
|
246
|
+
re.IGNORECASE
|
|
247
|
+
),
|
|
248
|
+
},
|
|
249
|
+
]
|
|
250
|
+
|
|
251
|
+
# ── FILE EXTENSIONS TO SCAN ───────────────────────────────────────────────────
|
|
252
|
+
# Permi only reads these file types. Binary files, images, and lockfiles
|
|
253
|
+
# are skipped automatically.
|
|
254
|
+
|
|
255
|
+
SCANNABLE_EXTENSIONS = {
|
|
256
|
+
".py", ".js", ".ts", ".jsx", ".tsx",
|
|
257
|
+
".html", ".htm", ".php", ".java",
|
|
258
|
+
".env", ".yml", ".yaml", ".json",
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
# ── DIRECTORIES TO SKIP ───────────────────────────────────────────────────────
|
|
262
|
+
# These folders are never scanned — they contain dependencies or build
|
|
263
|
+
# artifacts, not your actual code.
|
|
264
|
+
|
|
265
|
+
SKIP_DIRS = {
|
|
266
|
+
"node_modules", "venv", ".venv", "__pycache__",
|
|
267
|
+
".git", "dist", "build", ".next", "target",
|
|
268
|
+
}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# scanner/scan.py
|
|
2
|
+
# Full scan pipeline — engine + AI filter combined.
|
|
3
|
+
# Supports local directories and GitHub URLs.
|
|
4
|
+
|
|
5
|
+
import tempfile
|
|
6
|
+
import subprocess
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from scanner.engine import scan_directory as run_engine
|
|
10
|
+
from ai_filter.filter import run_filter
|
|
11
|
+
from db.database import init_db, get_connection
|
|
12
|
+
from db.queries import (
|
|
13
|
+
create_project,
|
|
14
|
+
start_scan,
|
|
15
|
+
save_finding,
|
|
16
|
+
finish_scan,
|
|
17
|
+
update_last_scan,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _is_github_url(path: str) -> bool:
|
|
22
|
+
"""Return True if the path looks like a GitHub URL."""
|
|
23
|
+
p = path.strip().strip('"').strip("'")
|
|
24
|
+
return p.startswith("https://github.com/") or p.startswith("git@github.com:")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _clone_repo(url: str, target_dir: Path) -> None:
|
|
28
|
+
"""
|
|
29
|
+
Clone a GitHub repo into target_dir.
|
|
30
|
+
Raises RuntimeError if git is not installed or the clone fails.
|
|
31
|
+
"""
|
|
32
|
+
print(f"[Permi] Cloning : {url}")
|
|
33
|
+
result = subprocess.run(
|
|
34
|
+
["git", "clone", "--depth", "1", url, str(target_dir)],
|
|
35
|
+
capture_output=True,
|
|
36
|
+
text=True,
|
|
37
|
+
)
|
|
38
|
+
if result.returncode != 0:
|
|
39
|
+
raise RuntimeError(f"git clone failed:\n{result.stderr.strip()}")
|
|
40
|
+
print(f"[Permi] Cloned to: {target_dir}\n")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def scan(
|
|
44
|
+
path: str,
|
|
45
|
+
project_name: str = None,
|
|
46
|
+
offline: bool = False,
|
|
47
|
+
) -> tuple[list[dict], int]:
|
|
48
|
+
"""
|
|
49
|
+
Full scan pipeline.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
(real_findings, raw_count)
|
|
53
|
+
real_findings — findings the AI marked as REAL
|
|
54
|
+
raw_count — total before AI filtering (for accurate summary)
|
|
55
|
+
"""
|
|
56
|
+
# ── Normalise path ────────────────────────────────────────────────────────
|
|
57
|
+
path = path.strip().strip('"').strip("'")
|
|
58
|
+
is_github = _is_github_url(path)
|
|
59
|
+
|
|
60
|
+
# ── GitHub URL — clone to a temp directory ────────────────────────────────
|
|
61
|
+
if is_github:
|
|
62
|
+
tmp = tempfile.TemporaryDirectory()
|
|
63
|
+
target = Path(tmp.name) / "repo"
|
|
64
|
+
_clone_repo(path, target)
|
|
65
|
+
default_name = path.rstrip("/").split("/")[-1].replace(".git", "")
|
|
66
|
+
else:
|
|
67
|
+
tmp = None
|
|
68
|
+
target = Path(path).resolve()
|
|
69
|
+
if not target.exists():
|
|
70
|
+
raise FileNotFoundError(f"Path does not exist: {target}")
|
|
71
|
+
if not target.is_dir():
|
|
72
|
+
raise NotADirectoryError(f"Path is not a directory: {target}")
|
|
73
|
+
default_name = target.name
|
|
74
|
+
|
|
75
|
+
name = project_name or default_name
|
|
76
|
+
|
|
77
|
+
# ── Init DB and create/find project ───────────────────────────────────────
|
|
78
|
+
init_db()
|
|
79
|
+
conn = get_connection()
|
|
80
|
+
project_id = create_project(conn, name=name, path=str(target))
|
|
81
|
+
|
|
82
|
+
# ── Start scan record ─────────────────────────────────────────────────────
|
|
83
|
+
scan_id = start_scan(conn, project_id)
|
|
84
|
+
|
|
85
|
+
print(f"[Permi] Scanning : {target}")
|
|
86
|
+
print(f"[Permi] Project : {name} (id={project_id})")
|
|
87
|
+
print(f"[Permi] Scan : id={scan_id}\n")
|
|
88
|
+
|
|
89
|
+
# ── Run the engine ────────────────────────────────────────────────────────
|
|
90
|
+
raw_findings = run_engine(target)
|
|
91
|
+
raw_count = len(raw_findings)
|
|
92
|
+
scanned_files = len({f["file"] for f in raw_findings}) if raw_findings else 0
|
|
93
|
+
|
|
94
|
+
print(f"[Permi] Engine found {raw_count} raw finding(s) "
|
|
95
|
+
f"across {scanned_files} file(s)\n")
|
|
96
|
+
|
|
97
|
+
# ── Save every raw finding to DB ──────────────────────────────────────────
|
|
98
|
+
for finding in raw_findings:
|
|
99
|
+
finding_id = save_finding(conn, scan_id, finding)
|
|
100
|
+
finding["id"] = finding_id
|
|
101
|
+
|
|
102
|
+
conn.close()
|
|
103
|
+
|
|
104
|
+
# ── Run AI filter ─────────────────────────────────────────────────────────
|
|
105
|
+
real_findings = run_filter(raw_findings, offline=offline)
|
|
106
|
+
|
|
107
|
+
# ── Finish scan record ────────────────────────────────────────────────────
|
|
108
|
+
conn = get_connection()
|
|
109
|
+
finish_scan(
|
|
110
|
+
conn,
|
|
111
|
+
scan_id=scan_id,
|
|
112
|
+
total_files=scanned_files,
|
|
113
|
+
total_findings=len(real_findings),
|
|
114
|
+
)
|
|
115
|
+
update_last_scan(conn, project_id)
|
|
116
|
+
conn.close()
|
|
117
|
+
|
|
118
|
+
# ── Clean up temp clone ───────────────────────────────────────────────────
|
|
119
|
+
if tmp:
|
|
120
|
+
tmp.cleanup()
|
|
121
|
+
print("[Permi] Temp clone deleted.\n")
|
|
122
|
+
|
|
123
|
+
return real_findings, raw_count
|
permi-0.1.1/setup.cfg
ADDED